diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,14809 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 6712, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.005216484089723527, + "grad_norm": 13.952567215390388, + "learning_rate": 2.3809523809523811e-07, + "loss": 0.6702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.342024564743042, + "step": 5, + "valid_targets_mean": 7214.4, + "valid_targets_min": 4847 + }, + { + "epoch": 0.010432968179447054, + "grad_norm": 13.17170449558287, + "learning_rate": 5.357142857142857e-07, + "loss": 0.6198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3091505169868469, + "step": 10, + "valid_targets_mean": 6417.1, + "valid_targets_min": 4480 + }, + { + "epoch": 0.01564945226917058, + "grad_norm": 13.063636706792444, + "learning_rate": 8.333333333333333e-07, + "loss": 0.6626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33068743348121643, + "step": 15, + "valid_targets_mean": 7605.4, + "valid_targets_min": 5236 + }, + { + "epoch": 0.020865936358894107, + "grad_norm": 12.458527626022551, + "learning_rate": 1.130952380952381e-06, + "loss": 0.6566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2945317029953003, + "step": 20, + "valid_targets_mean": 5827.9, + "valid_targets_min": 4727 + }, + { + "epoch": 0.02608242044861763, + "grad_norm": 8.995964015786168, + "learning_rate": 1.4285714285714286e-06, + "loss": 0.6386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3329887390136719, + "step": 25, + "valid_targets_mean": 6448.2, + "valid_targets_min": 4211 + }, + { + "epoch": 0.03129890453834116, + "grad_norm": 7.112373586688576, + "learning_rate": 1.7261904761904764e-06, + "loss": 0.5872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3238425552845001, + "step": 30, + "valid_targets_mean": 6108.1, + "valid_targets_min": 4279 + }, + { + "epoch": 0.036515388628064686, + "grad_norm": 5.570774823347275, + "learning_rate": 2.023809523809524e-06, + "loss": 0.5426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2542545795440674, + "step": 35, + "valid_targets_mean": 5962.4, + "valid_targets_min": 4589 + }, + { + "epoch": 0.041731872717788214, + "grad_norm": 6.180455246044265, + "learning_rate": 2.321428571428572e-06, + "loss": 0.5198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22422923147678375, + "step": 40, + "valid_targets_mean": 3503.2, + "valid_targets_min": 2015 + }, + { + "epoch": 0.046948356807511735, + "grad_norm": 2.4315840711583823, + "learning_rate": 2.6190476190476192e-06, + "loss": 0.4393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19161885976791382, + "step": 45, + "valid_targets_mean": 6102.5, + "valid_targets_min": 4778 + }, + { + "epoch": 0.05216484089723526, + "grad_norm": 1.7765776990100113, + "learning_rate": 2.916666666666667e-06, + "loss": 0.4159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1991199254989624, + "step": 50, + "valid_targets_mean": 5868.2, + "valid_targets_min": 4612 + }, + { + "epoch": 0.05738132498695879, + "grad_norm": 1.292787648339216, + "learning_rate": 3.2142857142857147e-06, + "loss": 0.4238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2173127382993698, + "step": 55, + "valid_targets_mean": 6105.8, + "valid_targets_min": 4491 + }, + { + "epoch": 0.06259780907668232, + "grad_norm": 0.9804304862760956, + "learning_rate": 3.511904761904762e-06, + "loss": 0.3851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1795063465833664, + "step": 60, + "valid_targets_mean": 6962.0, + "valid_targets_min": 4754 + }, + { + "epoch": 0.06781429316640585, + "grad_norm": 0.9118344834712908, + "learning_rate": 3.80952380952381e-06, + "loss": 0.3759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19778475165367126, + "step": 65, + "valid_targets_mean": 6035.1, + "valid_targets_min": 4787 + }, + { + "epoch": 0.07303077725612937, + "grad_norm": 0.7711763389757269, + "learning_rate": 4.107142857142857e-06, + "loss": 0.3659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18539223074913025, + "step": 70, + "valid_targets_mean": 6028.5, + "valid_targets_min": 5213 + }, + { + "epoch": 0.0782472613458529, + "grad_norm": 0.605792293229026, + "learning_rate": 4.404761904761905e-06, + "loss": 0.3482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15124306082725525, + "step": 75, + "valid_targets_mean": 6814.1, + "valid_targets_min": 4664 + }, + { + "epoch": 0.08346374543557643, + "grad_norm": 0.649524812516737, + "learning_rate": 4.702380952380953e-06, + "loss": 0.3617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1689397543668747, + "step": 80, + "valid_targets_mean": 6281.5, + "valid_targets_min": 4996 + }, + { + "epoch": 0.08868022952529994, + "grad_norm": 0.8359186274138423, + "learning_rate": 5e-06, + "loss": 0.3462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16392885148525238, + "step": 85, + "valid_targets_mean": 6628.9, + "valid_targets_min": 4582 + }, + { + "epoch": 0.09389671361502347, + "grad_norm": 0.551872234081218, + "learning_rate": 5.297619047619048e-06, + "loss": 0.3299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14849244058132172, + "step": 90, + "valid_targets_mean": 6536.6, + "valid_targets_min": 5205 + }, + { + "epoch": 0.099113197704747, + "grad_norm": 0.5211969129925008, + "learning_rate": 5.595238095238096e-06, + "loss": 0.3131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1576332449913025, + "step": 95, + "valid_targets_mean": 7031.0, + "valid_targets_min": 5098 + }, + { + "epoch": 0.10432968179447052, + "grad_norm": 0.5251010382126695, + "learning_rate": 5.892857142857144e-06, + "loss": 0.3205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17972427606582642, + "step": 100, + "valid_targets_mean": 6514.2, + "valid_targets_min": 5016 + }, + { + "epoch": 0.10954616588419405, + "grad_norm": 0.5396788095201667, + "learning_rate": 6.1904761904761914e-06, + "loss": 0.3511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2024439573287964, + "step": 105, + "valid_targets_mean": 6330.0, + "valid_targets_min": 965 + }, + { + "epoch": 0.11476264997391758, + "grad_norm": 0.5052137939412624, + "learning_rate": 6.488095238095239e-06, + "loss": 0.3051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15431496500968933, + "step": 110, + "valid_targets_mean": 6293.1, + "valid_targets_min": 5218 + }, + { + "epoch": 0.11997913406364111, + "grad_norm": 0.5387465343464581, + "learning_rate": 6.785714285714287e-06, + "loss": 0.3055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13877981901168823, + "step": 115, + "valid_targets_mean": 6957.4, + "valid_targets_min": 4213 + }, + { + "epoch": 0.12519561815336464, + "grad_norm": 0.5357533765308142, + "learning_rate": 7.083333333333335e-06, + "loss": 0.3235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1695391833782196, + "step": 120, + "valid_targets_mean": 5888.5, + "valid_targets_min": 4923 + }, + { + "epoch": 0.13041210224308816, + "grad_norm": 0.48795368210762313, + "learning_rate": 7.380952380952382e-06, + "loss": 0.3366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2015056312084198, + "step": 125, + "valid_targets_mean": 7822.9, + "valid_targets_min": 4993 + }, + { + "epoch": 0.1356285863328117, + "grad_norm": 0.6730933782791901, + "learning_rate": 7.67857142857143e-06, + "loss": 0.3314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1623677760362625, + "step": 130, + "valid_targets_mean": 6098.6, + "valid_targets_min": 4872 + }, + { + "epoch": 0.14084507042253522, + "grad_norm": 0.49991429105966795, + "learning_rate": 7.976190476190477e-06, + "loss": 0.3036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1553228348493576, + "step": 135, + "valid_targets_mean": 6892.2, + "valid_targets_min": 4706 + }, + { + "epoch": 0.14606155451225875, + "grad_norm": 0.6125620842457005, + "learning_rate": 8.273809523809523e-06, + "loss": 0.2994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13916191458702087, + "step": 140, + "valid_targets_mean": 6352.0, + "valid_targets_min": 4889 + }, + { + "epoch": 0.15127803860198227, + "grad_norm": 0.5422905436002668, + "learning_rate": 8.571428571428571e-06, + "loss": 0.2978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1404762864112854, + "step": 145, + "valid_targets_mean": 5948.9, + "valid_targets_min": 3888 + }, + { + "epoch": 0.1564945226917058, + "grad_norm": 0.6347162236879426, + "learning_rate": 8.869047619047619e-06, + "loss": 0.3029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13690190017223358, + "step": 150, + "valid_targets_mean": 5846.2, + "valid_targets_min": 4051 + }, + { + "epoch": 0.16171100678142933, + "grad_norm": 0.4697433365651612, + "learning_rate": 9.166666666666666e-06, + "loss": 0.2883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14216038584709167, + "step": 155, + "valid_targets_mean": 7319.2, + "valid_targets_min": 5230 + }, + { + "epoch": 0.16692749087115286, + "grad_norm": 0.5131167817509925, + "learning_rate": 9.464285714285714e-06, + "loss": 0.2919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14354196190834045, + "step": 160, + "valid_targets_mean": 8235.1, + "valid_targets_min": 4997 + }, + { + "epoch": 0.17214397496087636, + "grad_norm": 0.5503937475977374, + "learning_rate": 9.761904761904762e-06, + "loss": 0.2917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15024146437644958, + "step": 165, + "valid_targets_mean": 6402.5, + "valid_targets_min": 5364 + }, + { + "epoch": 0.17736045905059988, + "grad_norm": 0.5277697834211738, + "learning_rate": 1.005952380952381e-05, + "loss": 0.2804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16012150049209595, + "step": 170, + "valid_targets_mean": 7117.4, + "valid_targets_min": 3391 + }, + { + "epoch": 0.1825769431403234, + "grad_norm": 0.4564638560196473, + "learning_rate": 1.0357142857142859e-05, + "loss": 0.2494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13276173174381256, + "step": 175, + "valid_targets_mean": 7542.8, + "valid_targets_min": 4829 + }, + { + "epoch": 0.18779342723004694, + "grad_norm": 0.5357862664302611, + "learning_rate": 1.0654761904761905e-05, + "loss": 0.2822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12922212481498718, + "step": 180, + "valid_targets_mean": 6284.4, + "valid_targets_min": 5180 + }, + { + "epoch": 0.19300991131977047, + "grad_norm": 0.5669263676930952, + "learning_rate": 1.0952380952380955e-05, + "loss": 0.2873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14764404296875, + "step": 185, + "valid_targets_mean": 5806.2, + "valid_targets_min": 5101 + }, + { + "epoch": 0.198226395409494, + "grad_norm": 0.5073884187777999, + "learning_rate": 1.125e-05, + "loss": 0.2827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1298997700214386, + "step": 190, + "valid_targets_mean": 6697.9, + "valid_targets_min": 4527 + }, + { + "epoch": 0.20344287949921752, + "grad_norm": 0.5063037127935047, + "learning_rate": 1.1547619047619047e-05, + "loss": 0.274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15421220660209656, + "step": 195, + "valid_targets_mean": 6410.4, + "valid_targets_min": 3082 + }, + { + "epoch": 0.20865936358894105, + "grad_norm": 0.49999533579965877, + "learning_rate": 1.1845238095238096e-05, + "loss": 0.2563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13167908787727356, + "step": 200, + "valid_targets_mean": 6847.1, + "valid_targets_min": 4685 + }, + { + "epoch": 0.21387584767866458, + "grad_norm": 0.6999993973408437, + "learning_rate": 1.2142857142857142e-05, + "loss": 0.2755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13832241296768188, + "step": 205, + "valid_targets_mean": 6194.0, + "valid_targets_min": 4644 + }, + { + "epoch": 0.2190923317683881, + "grad_norm": 0.4811213980124768, + "learning_rate": 1.2440476190476192e-05, + "loss": 0.274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12116702646017075, + "step": 210, + "valid_targets_mean": 6283.8, + "valid_targets_min": 4433 + }, + { + "epoch": 0.22430881585811163, + "grad_norm": 0.5084172919709663, + "learning_rate": 1.2738095238095238e-05, + "loss": 0.2645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1295085996389389, + "step": 215, + "valid_targets_mean": 5917.2, + "valid_targets_min": 4857 + }, + { + "epoch": 0.22952529994783516, + "grad_norm": 0.5605253281532208, + "learning_rate": 1.3035714285714287e-05, + "loss": 0.2754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16089364886283875, + "step": 220, + "valid_targets_mean": 6986.4, + "valid_targets_min": 5101 + }, + { + "epoch": 0.2347417840375587, + "grad_norm": 0.5786777796777427, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.2712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11480909585952759, + "step": 225, + "valid_targets_mean": 5299.5, + "valid_targets_min": 4442 + }, + { + "epoch": 0.23995826812728221, + "grad_norm": 0.5013407308623012, + "learning_rate": 1.3630952380952383e-05, + "loss": 0.2358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10149524360895157, + "step": 230, + "valid_targets_mean": 6133.9, + "valid_targets_min": 5130 + }, + { + "epoch": 0.24517475221700574, + "grad_norm": 0.6256106324180384, + "learning_rate": 1.3928571428571429e-05, + "loss": 0.2394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13665564358234406, + "step": 235, + "valid_targets_mean": 6477.4, + "valid_targets_min": 4162 + }, + { + "epoch": 0.25039123630672927, + "grad_norm": 0.8127933423976271, + "learning_rate": 1.4226190476190478e-05, + "loss": 0.2464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11668835580348969, + "step": 240, + "valid_targets_mean": 6350.4, + "valid_targets_min": 3940 + }, + { + "epoch": 0.2556077203964528, + "grad_norm": 0.5386894105873504, + "learning_rate": 1.4523809523809524e-05, + "loss": 0.2567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14738333225250244, + "step": 245, + "valid_targets_mean": 6625.1, + "valid_targets_min": 4642 + }, + { + "epoch": 0.2608242044861763, + "grad_norm": 0.6632680337779865, + "learning_rate": 1.4821428571428574e-05, + "loss": 0.2908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14054007828235626, + "step": 250, + "valid_targets_mean": 4907.0, + "valid_targets_min": 4431 + }, + { + "epoch": 0.26604068857589985, + "grad_norm": 0.5317194617067228, + "learning_rate": 1.511904761904762e-05, + "loss": 0.2986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13532723486423492, + "step": 255, + "valid_targets_mean": 6465.0, + "valid_targets_min": 5333 + }, + { + "epoch": 0.2712571726656234, + "grad_norm": 0.5619269623945187, + "learning_rate": 1.5416666666666668e-05, + "loss": 0.2484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13153497874736786, + "step": 260, + "valid_targets_mean": 5650.5, + "valid_targets_min": 4769 + }, + { + "epoch": 0.2764736567553469, + "grad_norm": 0.560845136373923, + "learning_rate": 1.5714285714285715e-05, + "loss": 0.259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12566320598125458, + "step": 265, + "valid_targets_mean": 5540.0, + "valid_targets_min": 4281 + }, + { + "epoch": 0.28169014084507044, + "grad_norm": 0.5654604319502452, + "learning_rate": 1.6011904761904763e-05, + "loss": 0.2614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17218875885009766, + "step": 270, + "valid_targets_mean": 6499.0, + "valid_targets_min": 4844 + }, + { + "epoch": 0.28690662493479396, + "grad_norm": 0.5233428145807639, + "learning_rate": 1.630952380952381e-05, + "loss": 0.2752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11128735542297363, + "step": 275, + "valid_targets_mean": 5736.5, + "valid_targets_min": 5227 + }, + { + "epoch": 0.2921231090245175, + "grad_norm": 0.47570376634873135, + "learning_rate": 1.660714285714286e-05, + "loss": 0.2501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09911037981510162, + "step": 280, + "valid_targets_mean": 5857.2, + "valid_targets_min": 2461 + }, + { + "epoch": 0.297339593114241, + "grad_norm": 0.49129165768615857, + "learning_rate": 1.6904761904761906e-05, + "loss": 0.266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11322127282619476, + "step": 285, + "valid_targets_mean": 5793.4, + "valid_targets_min": 2815 + }, + { + "epoch": 0.30255607720396455, + "grad_norm": 0.49192424843510896, + "learning_rate": 1.7202380952380954e-05, + "loss": 0.2548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10852590203285217, + "step": 290, + "valid_targets_mean": 6588.8, + "valid_targets_min": 4875 + }, + { + "epoch": 0.3077725612936881, + "grad_norm": 0.5567515492668403, + "learning_rate": 1.7500000000000002e-05, + "loss": 0.233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11411102861166, + "step": 295, + "valid_targets_mean": 5984.8, + "valid_targets_min": 4901 + }, + { + "epoch": 0.3129890453834116, + "grad_norm": 0.4591946224537777, + "learning_rate": 1.779761904761905e-05, + "loss": 0.2484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11157451570034027, + "step": 300, + "valid_targets_mean": 6067.4, + "valid_targets_min": 3786 + }, + { + "epoch": 0.31820552947313513, + "grad_norm": 0.529182603864311, + "learning_rate": 1.8095238095238097e-05, + "loss": 0.3019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12380390614271164, + "step": 305, + "valid_targets_mean": 5852.5, + "valid_targets_min": 5057 + }, + { + "epoch": 0.32342201356285866, + "grad_norm": 0.49313061810148234, + "learning_rate": 1.8392857142857142e-05, + "loss": 0.2378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13214895129203796, + "step": 310, + "valid_targets_mean": 6362.2, + "valid_targets_min": 3952 + }, + { + "epoch": 0.3286384976525822, + "grad_norm": 0.629976579067269, + "learning_rate": 1.8690476190476193e-05, + "loss": 0.2556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1273614466190338, + "step": 315, + "valid_targets_mean": 5360.2, + "valid_targets_min": 4267 + }, + { + "epoch": 0.3338549817423057, + "grad_norm": 0.547331137177352, + "learning_rate": 1.8988095238095237e-05, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14697670936584473, + "step": 320, + "valid_targets_mean": 6842.1, + "valid_targets_min": 4785 + }, + { + "epoch": 0.33907146583202924, + "grad_norm": 0.48734578011312796, + "learning_rate": 1.928571428571429e-05, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11150479316711426, + "step": 325, + "valid_targets_mean": 6188.5, + "valid_targets_min": 5438 + }, + { + "epoch": 0.3442879499217527, + "grad_norm": 0.5291082388162386, + "learning_rate": 1.9583333333333333e-05, + "loss": 0.2354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1297616958618164, + "step": 330, + "valid_targets_mean": 5848.6, + "valid_targets_min": 4734 + }, + { + "epoch": 0.34950443401147624, + "grad_norm": 0.4991023204044274, + "learning_rate": 1.9880952380952384e-05, + "loss": 0.2213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10059548914432526, + "step": 335, + "valid_targets_mean": 6278.2, + "valid_targets_min": 5636 + }, + { + "epoch": 0.35472091810119977, + "grad_norm": 0.486040056991869, + "learning_rate": 2.0178571428571428e-05, + "loss": 0.2448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1058349534869194, + "step": 340, + "valid_targets_mean": 6168.9, + "valid_targets_min": 4206 + }, + { + "epoch": 0.3599374021909233, + "grad_norm": 0.48251001258698706, + "learning_rate": 2.0476190476190476e-05, + "loss": 0.2343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10463644564151764, + "step": 345, + "valid_targets_mean": 6226.5, + "valid_targets_min": 4535 + }, + { + "epoch": 0.3651538862806468, + "grad_norm": 0.43326046684886643, + "learning_rate": 2.0773809523809527e-05, + "loss": 0.2352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11551818251609802, + "step": 350, + "valid_targets_mean": 7821.6, + "valid_targets_min": 5471 + }, + { + "epoch": 0.37037037037037035, + "grad_norm": 0.44150421157239794, + "learning_rate": 2.1071428571428575e-05, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10282167792320251, + "step": 355, + "valid_targets_mean": 6843.0, + "valid_targets_min": 5243 + }, + { + "epoch": 0.3755868544600939, + "grad_norm": 0.5535900149894346, + "learning_rate": 2.136904761904762e-05, + "loss": 0.2264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1394716501235962, + "step": 360, + "valid_targets_mean": 6658.6, + "valid_targets_min": 4906 + }, + { + "epoch": 0.3808033385498174, + "grad_norm": 0.5370926712700428, + "learning_rate": 2.1666666666666667e-05, + "loss": 0.2519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12328899651765823, + "step": 365, + "valid_targets_mean": 6799.5, + "valid_targets_min": 5168 + }, + { + "epoch": 0.38601982263954093, + "grad_norm": 0.5779359461319825, + "learning_rate": 2.1964285714285718e-05, + "loss": 0.2566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13125546276569366, + "step": 370, + "valid_targets_mean": 5376.2, + "valid_targets_min": 3130 + }, + { + "epoch": 0.39123630672926446, + "grad_norm": 0.8248922606305263, + "learning_rate": 2.2261904761904766e-05, + "loss": 0.3344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20462170243263245, + "step": 375, + "valid_targets_mean": 5092.5, + "valid_targets_min": 1971 + }, + { + "epoch": 0.396452790818988, + "grad_norm": 0.6526420849802459, + "learning_rate": 2.255952380952381e-05, + "loss": 0.4074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2032327651977539, + "step": 380, + "valid_targets_mean": 5324.1, + "valid_targets_min": 1631 + }, + { + "epoch": 0.4016692749087115, + "grad_norm": 0.6790916058673229, + "learning_rate": 2.2857142857142858e-05, + "loss": 0.4028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24990153312683105, + "step": 385, + "valid_targets_mean": 6070.8, + "valid_targets_min": 3294 + }, + { + "epoch": 0.40688575899843504, + "grad_norm": 0.6593868352995019, + "learning_rate": 2.315476190476191e-05, + "loss": 0.381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1769995093345642, + "step": 390, + "valid_targets_mean": 4407.1, + "valid_targets_min": 2736 + }, + { + "epoch": 0.41210224308815857, + "grad_norm": 0.6145314966552541, + "learning_rate": 2.3452380952380957e-05, + "loss": 0.3782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14936703443527222, + "step": 395, + "valid_targets_mean": 3646.2, + "valid_targets_min": 980 + }, + { + "epoch": 0.4173187271778821, + "grad_norm": 0.5816621860431147, + "learning_rate": 2.375e-05, + "loss": 0.3937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17872054874897003, + "step": 400, + "valid_targets_mean": 5067.1, + "valid_targets_min": 1512 + }, + { + "epoch": 0.4225352112676056, + "grad_norm": 0.8464660280638143, + "learning_rate": 2.404761904761905e-05, + "loss": 0.3631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.188622385263443, + "step": 405, + "valid_targets_mean": 4779.2, + "valid_targets_min": 2405 + }, + { + "epoch": 0.42775169535732915, + "grad_norm": 0.5627851823694907, + "learning_rate": 2.4345238095238093e-05, + "loss": 0.3716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14649832248687744, + "step": 410, + "valid_targets_mean": 3286.4, + "valid_targets_min": 1654 + }, + { + "epoch": 0.4329681794470527, + "grad_norm": 0.5549193615673366, + "learning_rate": 2.4642857142857145e-05, + "loss": 0.3675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17032572627067566, + "step": 415, + "valid_targets_mean": 5448.2, + "valid_targets_min": 4644 + }, + { + "epoch": 0.4381846635367762, + "grad_norm": 0.6193042159377287, + "learning_rate": 2.4940476190476192e-05, + "loss": 0.3865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16815653443336487, + "step": 420, + "valid_targets_mean": 4343.2, + "valid_targets_min": 2003 + }, + { + "epoch": 0.44340114762649974, + "grad_norm": 0.5585921649267166, + "learning_rate": 2.523809523809524e-05, + "loss": 0.368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1892159879207611, + "step": 425, + "valid_targets_mean": 6736.4, + "valid_targets_min": 953 + }, + { + "epoch": 0.44861763171622326, + "grad_norm": 0.6011653585282667, + "learning_rate": 2.5535714285714284e-05, + "loss": 0.3671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16570714116096497, + "step": 430, + "valid_targets_mean": 4140.9, + "valid_targets_min": 1265 + }, + { + "epoch": 0.4538341158059468, + "grad_norm": 0.5668964485866105, + "learning_rate": 2.5833333333333336e-05, + "loss": 0.3674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.166670024394989, + "step": 435, + "valid_targets_mean": 4903.1, + "valid_targets_min": 1982 + }, + { + "epoch": 0.4590505998956703, + "grad_norm": 0.5876830092759732, + "learning_rate": 2.6130952380952383e-05, + "loss": 0.3696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16399607062339783, + "step": 440, + "valid_targets_mean": 4012.9, + "valid_targets_min": 1759 + }, + { + "epoch": 0.46426708398539385, + "grad_norm": 0.7412534025295432, + "learning_rate": 2.642857142857143e-05, + "loss": 0.3778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17856431007385254, + "step": 445, + "valid_targets_mean": 3281.4, + "valid_targets_min": 1451 + }, + { + "epoch": 0.4694835680751174, + "grad_norm": 0.7003561348163779, + "learning_rate": 2.6726190476190475e-05, + "loss": 0.3764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19483688473701477, + "step": 450, + "valid_targets_mean": 3966.2, + "valid_targets_min": 1861 + }, + { + "epoch": 0.4747000521648409, + "grad_norm": 0.6305956621541107, + "learning_rate": 2.7023809523809527e-05, + "loss": 0.3657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1725791096687317, + "step": 455, + "valid_targets_mean": 4078.9, + "valid_targets_min": 2100 + }, + { + "epoch": 0.47991653625456443, + "grad_norm": 0.6618844022970122, + "learning_rate": 2.7321428571428574e-05, + "loss": 0.3837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19594892859458923, + "step": 460, + "valid_targets_mean": 3775.8, + "valid_targets_min": 2767 + }, + { + "epoch": 0.48513302034428796, + "grad_norm": 0.798636666769194, + "learning_rate": 2.7619047619047622e-05, + "loss": 0.3854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22000110149383545, + "step": 465, + "valid_targets_mean": 3907.5, + "valid_targets_min": 1205 + }, + { + "epoch": 0.4903495044340115, + "grad_norm": 0.6895002140390145, + "learning_rate": 2.7916666666666666e-05, + "loss": 0.3711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15861332416534424, + "step": 470, + "valid_targets_mean": 4297.4, + "valid_targets_min": 2004 + }, + { + "epoch": 0.495565988523735, + "grad_norm": 0.6242113787789374, + "learning_rate": 2.8214285714285718e-05, + "loss": 0.3782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17085091769695282, + "step": 475, + "valid_targets_mean": 3638.5, + "valid_targets_min": 1020 + }, + { + "epoch": 0.5007824726134585, + "grad_norm": 0.6864910172052782, + "learning_rate": 2.8511904761904765e-05, + "loss": 0.3572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2075367569923401, + "step": 480, + "valid_targets_mean": 4024.0, + "valid_targets_min": 2130 + }, + { + "epoch": 0.5059989567031821, + "grad_norm": 0.7132903349587734, + "learning_rate": 2.8809523809523813e-05, + "loss": 0.3518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1828821748495102, + "step": 485, + "valid_targets_mean": 4524.0, + "valid_targets_min": 2046 + }, + { + "epoch": 0.5112154407929056, + "grad_norm": 0.7059910056116537, + "learning_rate": 2.9107142857142857e-05, + "loss": 0.3563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25037163496017456, + "step": 490, + "valid_targets_mean": 4543.6, + "valid_targets_min": 3373 + }, + { + "epoch": 0.5164319248826291, + "grad_norm": 0.678777094251199, + "learning_rate": 2.940476190476191e-05, + "loss": 0.3672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16667532920837402, + "step": 495, + "valid_targets_mean": 3376.9, + "valid_targets_min": 1747 + }, + { + "epoch": 0.5216484089723527, + "grad_norm": 0.642730758797157, + "learning_rate": 2.9702380952380956e-05, + "loss": 0.366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1729573905467987, + "step": 500, + "valid_targets_mean": 3227.2, + "valid_targets_min": 1570 + }, + { + "epoch": 0.5268648930620762, + "grad_norm": 0.609487267048289, + "learning_rate": 3.0000000000000004e-05, + "loss": 0.3493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18624669313430786, + "step": 505, + "valid_targets_mean": 4808.2, + "valid_targets_min": 1537 + }, + { + "epoch": 0.5320813771517997, + "grad_norm": 0.5913765796977307, + "learning_rate": 3.029761904761905e-05, + "loss": 0.3493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14506345987319946, + "step": 510, + "valid_targets_mean": 4261.5, + "valid_targets_min": 1934 + }, + { + "epoch": 0.5372978612415232, + "grad_norm": 0.6635609089682186, + "learning_rate": 3.059523809523809e-05, + "loss": 0.3619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15640445053577423, + "step": 515, + "valid_targets_mean": 3327.4, + "valid_targets_min": 2562 + }, + { + "epoch": 0.5425143453312468, + "grad_norm": 0.633707748757846, + "learning_rate": 3.089285714285715e-05, + "loss": 0.3565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1799418330192566, + "step": 520, + "valid_targets_mean": 4351.6, + "valid_targets_min": 2659 + }, + { + "epoch": 0.5477308294209703, + "grad_norm": 0.6446651898964769, + "learning_rate": 3.1190476190476195e-05, + "loss": 0.3481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17147403955459595, + "step": 525, + "valid_targets_mean": 3976.4, + "valid_targets_min": 1349 + }, + { + "epoch": 0.5529473135106938, + "grad_norm": 0.684722134091853, + "learning_rate": 3.148809523809524e-05, + "loss": 0.3527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12899863719940186, + "step": 530, + "valid_targets_mean": 2862.2, + "valid_targets_min": 1367 + }, + { + "epoch": 0.5581637976004173, + "grad_norm": 0.7048938858836133, + "learning_rate": 3.1785714285714284e-05, + "loss": 0.3624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17373277246952057, + "step": 535, + "valid_targets_mean": 3540.0, + "valid_targets_min": 2026 + }, + { + "epoch": 0.5633802816901409, + "grad_norm": 0.6816952448739764, + "learning_rate": 3.208333333333334e-05, + "loss": 0.3423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15459300577640533, + "step": 540, + "valid_targets_mean": 3112.6, + "valid_targets_min": 2033 + }, + { + "epoch": 0.5685967657798644, + "grad_norm": 0.8045656755040194, + "learning_rate": 3.2380952380952386e-05, + "loss": 0.3668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19262531399726868, + "step": 545, + "valid_targets_mean": 2677.6, + "valid_targets_min": 1285 + }, + { + "epoch": 0.5738132498695879, + "grad_norm": 0.8579704192745176, + "learning_rate": 3.2678571428571434e-05, + "loss": 0.3582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19750116765499115, + "step": 550, + "valid_targets_mean": 2929.1, + "valid_targets_min": 1821 + }, + { + "epoch": 0.5790297339593115, + "grad_norm": 0.6633189210564149, + "learning_rate": 3.2976190476190475e-05, + "loss": 0.3523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18241479992866516, + "step": 555, + "valid_targets_mean": 3896.9, + "valid_targets_min": 2535 + }, + { + "epoch": 0.584246218049035, + "grad_norm": 0.7368897071799573, + "learning_rate": 3.327380952380953e-05, + "loss": 0.3466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1817534863948822, + "step": 560, + "valid_targets_mean": 3107.6, + "valid_targets_min": 2080 + }, + { + "epoch": 0.5894627021387585, + "grad_norm": 1.0961742062904434, + "learning_rate": 3.357142857142858e-05, + "loss": 0.3484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1669929027557373, + "step": 565, + "valid_targets_mean": 2895.4, + "valid_targets_min": 1153 + }, + { + "epoch": 0.594679186228482, + "grad_norm": 0.6542407965721098, + "learning_rate": 3.386904761904762e-05, + "loss": 0.3413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1645527482032776, + "step": 570, + "valid_targets_mean": 3788.0, + "valid_targets_min": 1858 + }, + { + "epoch": 0.5998956703182056, + "grad_norm": 0.6998022518206112, + "learning_rate": 3.4166666666666666e-05, + "loss": 0.3585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16212904453277588, + "step": 575, + "valid_targets_mean": 3456.4, + "valid_targets_min": 1639 + }, + { + "epoch": 0.6051121544079291, + "grad_norm": 1.1077400798895365, + "learning_rate": 3.446428571428572e-05, + "loss": 0.3554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12318345904350281, + "step": 580, + "valid_targets_mean": 2912.5, + "valid_targets_min": 1394 + }, + { + "epoch": 0.6103286384976526, + "grad_norm": 0.6234711479932433, + "learning_rate": 3.476190476190477e-05, + "loss": 0.3455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16402480006217957, + "step": 585, + "valid_targets_mean": 4036.0, + "valid_targets_min": 1844 + }, + { + "epoch": 0.6155451225873761, + "grad_norm": 0.7407774765389948, + "learning_rate": 3.505952380952381e-05, + "loss": 0.3461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19531461596488953, + "step": 590, + "valid_targets_mean": 4271.6, + "valid_targets_min": 1734 + }, + { + "epoch": 0.6207616066770997, + "grad_norm": 0.6640760406792731, + "learning_rate": 3.535714285714286e-05, + "loss": 0.3419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18357989192008972, + "step": 595, + "valid_targets_mean": 4111.2, + "valid_targets_min": 2077 + }, + { + "epoch": 0.6259780907668232, + "grad_norm": 0.6623575022355853, + "learning_rate": 3.565476190476191e-05, + "loss": 0.3482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18263480067253113, + "step": 600, + "valid_targets_mean": 4209.2, + "valid_targets_min": 2349 + }, + { + "epoch": 0.6311945748565467, + "grad_norm": 0.5873869203596194, + "learning_rate": 3.595238095238096e-05, + "loss": 0.3361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18041619658470154, + "step": 605, + "valid_targets_mean": 4188.0, + "valid_targets_min": 1607 + }, + { + "epoch": 0.6364110589462703, + "grad_norm": 0.7340769038642447, + "learning_rate": 3.625e-05, + "loss": 0.3562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1722225397825241, + "step": 610, + "valid_targets_mean": 3289.2, + "valid_targets_min": 1507 + }, + { + "epoch": 0.6416275430359938, + "grad_norm": 0.6820100819143085, + "learning_rate": 3.654761904761905e-05, + "loss": 0.3555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16559788584709167, + "step": 615, + "valid_targets_mean": 3658.6, + "valid_targets_min": 1902 + }, + { + "epoch": 0.6468440271257173, + "grad_norm": 0.7122252987761405, + "learning_rate": 3.6845238095238096e-05, + "loss": 0.3446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15451264381408691, + "step": 620, + "valid_targets_mean": 3230.1, + "valid_targets_min": 654 + }, + { + "epoch": 0.6520605112154408, + "grad_norm": 0.6413463365353036, + "learning_rate": 3.714285714285715e-05, + "loss": 0.3469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1644289195537567, + "step": 625, + "valid_targets_mean": 3870.0, + "valid_targets_min": 2607 + }, + { + "epoch": 0.6572769953051644, + "grad_norm": 0.6113601419448348, + "learning_rate": 3.744047619047619e-05, + "loss": 0.3474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1882379949092865, + "step": 630, + "valid_targets_mean": 4594.5, + "valid_targets_min": 1739 + }, + { + "epoch": 0.6624934793948879, + "grad_norm": 0.6835158215443634, + "learning_rate": 3.773809523809524e-05, + "loss": 0.3445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16101132333278656, + "step": 635, + "valid_targets_mean": 3615.8, + "valid_targets_min": 1140 + }, + { + "epoch": 0.6677099634846114, + "grad_norm": 0.6555019155753702, + "learning_rate": 3.803571428571429e-05, + "loss": 0.3266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1729881763458252, + "step": 640, + "valid_targets_mean": 3535.9, + "valid_targets_min": 2501 + }, + { + "epoch": 0.672926447574335, + "grad_norm": 0.8547621763384377, + "learning_rate": 3.833333333333334e-05, + "loss": 0.3372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17927610874176025, + "step": 645, + "valid_targets_mean": 4050.4, + "valid_targets_min": 1298 + }, + { + "epoch": 0.6781429316640585, + "grad_norm": 0.5759701959582609, + "learning_rate": 3.863095238095238e-05, + "loss": 0.3167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17242711782455444, + "step": 650, + "valid_targets_mean": 4874.1, + "valid_targets_min": 3357 + }, + { + "epoch": 0.6833594157537819, + "grad_norm": 0.6815286648056115, + "learning_rate": 3.892857142857143e-05, + "loss": 0.3613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18129593133926392, + "step": 655, + "valid_targets_mean": 3619.9, + "valid_targets_min": 1726 + }, + { + "epoch": 0.6885758998435054, + "grad_norm": 0.607067303870676, + "learning_rate": 3.922619047619048e-05, + "loss": 0.3341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.132603719830513, + "step": 660, + "valid_targets_mean": 3198.0, + "valid_targets_min": 1743 + }, + { + "epoch": 0.693792383933229, + "grad_norm": 0.6064163869854979, + "learning_rate": 3.9523809523809526e-05, + "loss": 0.3434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18202610313892365, + "step": 665, + "valid_targets_mean": 3788.8, + "valid_targets_min": 1234 + }, + { + "epoch": 0.6990088680229525, + "grad_norm": 0.6552871902783126, + "learning_rate": 3.982142857142857e-05, + "loss": 0.347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1844119131565094, + "step": 670, + "valid_targets_mean": 4123.4, + "valid_targets_min": 2303 + }, + { + "epoch": 0.704225352112676, + "grad_norm": 0.6332742634876418, + "learning_rate": 3.999998918212333e-05, + "loss": 0.3499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17941603064537048, + "step": 675, + "valid_targets_mean": 4112.8, + "valid_targets_min": 1651 + }, + { + "epoch": 0.7094418362023995, + "grad_norm": 0.676262088347953, + "learning_rate": 3.999986748114514e-05, + "loss": 0.3419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17904874682426453, + "step": 680, + "valid_targets_mean": 2986.9, + "valid_targets_min": 1374 + }, + { + "epoch": 0.7146583202921231, + "grad_norm": 0.6340122078077409, + "learning_rate": 3.99996105576685e-05, + "loss": 0.3296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17928726971149445, + "step": 685, + "valid_targets_mean": 4170.5, + "valid_targets_min": 1625 + }, + { + "epoch": 0.7198748043818466, + "grad_norm": 0.6338123341906368, + "learning_rate": 3.999921841343052e-05, + "loss": 0.33, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1839752495288849, + "step": 690, + "valid_targets_mean": 4429.6, + "valid_targets_min": 1966 + }, + { + "epoch": 0.7250912884715701, + "grad_norm": 0.7657133027529998, + "learning_rate": 3.999869105108254e-05, + "loss": 0.3335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16377782821655273, + "step": 695, + "valid_targets_mean": 3639.9, + "valid_targets_min": 1561 + }, + { + "epoch": 0.7303077725612936, + "grad_norm": 0.6419248561079218, + "learning_rate": 3.9998028474190154e-05, + "loss": 0.3304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17485827207565308, + "step": 700, + "valid_targets_mean": 3708.9, + "valid_targets_min": 1444 + }, + { + "epoch": 0.7355242566510172, + "grad_norm": 0.6339879002021741, + "learning_rate": 3.999723068723316e-05, + "loss": 0.3147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13724328577518463, + "step": 705, + "valid_targets_mean": 2605.8, + "valid_targets_min": 1504 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.6426498766915865, + "learning_rate": 3.9996297695605534e-05, + "loss": 0.3244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16333600878715515, + "step": 710, + "valid_targets_mean": 3523.2, + "valid_targets_min": 858 + }, + { + "epoch": 0.7459572248304642, + "grad_norm": 0.691674429771374, + "learning_rate": 3.999522950561537e-05, + "loss": 0.3367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19072556495666504, + "step": 715, + "valid_targets_mean": 3901.8, + "valid_targets_min": 1288 + }, + { + "epoch": 0.7511737089201878, + "grad_norm": 0.7548299855223792, + "learning_rate": 3.999402612448491e-05, + "loss": 0.358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1557808816432953, + "step": 720, + "valid_targets_mean": 3838.9, + "valid_targets_min": 1537 + }, + { + "epoch": 0.7563901930099113, + "grad_norm": 0.6778272045458328, + "learning_rate": 3.99926875603504e-05, + "loss": 0.3281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17531397938728333, + "step": 725, + "valid_targets_mean": 3873.1, + "valid_targets_min": 2972 + }, + { + "epoch": 0.7616066770996348, + "grad_norm": 0.6455097643484315, + "learning_rate": 3.9991213822262105e-05, + "loss": 0.3458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20654642581939697, + "step": 730, + "valid_targets_mean": 4917.1, + "valid_targets_min": 2013 + }, + { + "epoch": 0.7668231611893583, + "grad_norm": 0.6700390093667336, + "learning_rate": 3.9989604920184215e-05, + "loss": 0.3251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16870465874671936, + "step": 735, + "valid_targets_mean": 3359.1, + "valid_targets_min": 1744 + }, + { + "epoch": 0.7720396452790819, + "grad_norm": 0.5846041959259788, + "learning_rate": 3.99878608649948e-05, + "loss": 0.3349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1434388905763626, + "step": 740, + "valid_targets_mean": 3980.5, + "valid_targets_min": 1914 + }, + { + "epoch": 0.7772561293688054, + "grad_norm": 0.6577274990671654, + "learning_rate": 3.9985981668485694e-05, + "loss": 0.3267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13307839632034302, + "step": 745, + "valid_targets_mean": 3546.5, + "valid_targets_min": 2414 + }, + { + "epoch": 0.7824726134585289, + "grad_norm": 0.6682153628187211, + "learning_rate": 3.9983967343362476e-05, + "loss": 0.3143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17749811708927155, + "step": 750, + "valid_targets_mean": 4299.2, + "valid_targets_min": 1937 + }, + { + "epoch": 0.7876890975482524, + "grad_norm": 0.7085684478169996, + "learning_rate": 3.998181790324434e-05, + "loss": 0.3426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18148940801620483, + "step": 755, + "valid_targets_mean": 3179.6, + "valid_targets_min": 1649 + }, + { + "epoch": 0.792905581637976, + "grad_norm": 0.7070764869214875, + "learning_rate": 3.997953336266402e-05, + "loss": 0.3274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1386784166097641, + "step": 760, + "valid_targets_mean": 2905.0, + "valid_targets_min": 1463 + }, + { + "epoch": 0.7981220657276995, + "grad_norm": 0.6347885172545966, + "learning_rate": 3.997711373706768e-05, + "loss": 0.3407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15476080775260925, + "step": 765, + "valid_targets_mean": 3069.8, + "valid_targets_min": 1426 + }, + { + "epoch": 0.803338549817423, + "grad_norm": 0.6737410068082825, + "learning_rate": 3.997455904281481e-05, + "loss": 0.3413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1453080028295517, + "step": 770, + "valid_targets_mean": 3046.1, + "valid_targets_min": 1661 + }, + { + "epoch": 0.8085550339071466, + "grad_norm": 0.5770142697092243, + "learning_rate": 3.997186929717814e-05, + "loss": 0.3358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18311554193496704, + "step": 775, + "valid_targets_mean": 4702.8, + "valid_targets_min": 1646 + }, + { + "epoch": 0.8137715179968701, + "grad_norm": 0.6841291302970418, + "learning_rate": 3.996904451834349e-05, + "loss": 0.3658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18371401727199554, + "step": 780, + "valid_targets_mean": 3419.0, + "valid_targets_min": 1450 + }, + { + "epoch": 0.8189880020865936, + "grad_norm": 0.6652761168059974, + "learning_rate": 3.9966084725409675e-05, + "loss": 0.3334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15099182724952698, + "step": 785, + "valid_targets_mean": 3187.5, + "valid_targets_min": 1625 + }, + { + "epoch": 0.8242044861763171, + "grad_norm": 0.5752849880619866, + "learning_rate": 3.996298993838836e-05, + "loss": 0.3419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12421144545078278, + "step": 790, + "valid_targets_mean": 3247.6, + "valid_targets_min": 1174 + }, + { + "epoch": 0.8294209702660407, + "grad_norm": 0.7849607564028861, + "learning_rate": 3.995976017820392e-05, + "loss": 0.3444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19505098462104797, + "step": 795, + "valid_targets_mean": 4418.8, + "valid_targets_min": 2949 + }, + { + "epoch": 0.8346374543557642, + "grad_norm": 0.5499671961768969, + "learning_rate": 3.995639546669331e-05, + "loss": 0.345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20089514553546906, + "step": 800, + "valid_targets_mean": 5217.8, + "valid_targets_min": 2652 + }, + { + "epoch": 0.8398539384454877, + "grad_norm": 0.5830494048364858, + "learning_rate": 3.995289582660593e-05, + "loss": 0.3388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16196425259113312, + "step": 805, + "valid_targets_mean": 3643.0, + "valid_targets_min": 1381 + }, + { + "epoch": 0.8450704225352113, + "grad_norm": 0.7404835120505795, + "learning_rate": 3.9949261281603415e-05, + "loss": 0.3424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16726821660995483, + "step": 810, + "valid_targets_mean": 4266.9, + "valid_targets_min": 637 + }, + { + "epoch": 0.8502869066249348, + "grad_norm": 0.6178665659589526, + "learning_rate": 3.9945491856259556e-05, + "loss": 0.3412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14165301620960236, + "step": 815, + "valid_targets_mean": 3250.2, + "valid_targets_min": 719 + }, + { + "epoch": 0.8555033907146583, + "grad_norm": 0.5361141011950915, + "learning_rate": 3.994158757606009e-05, + "loss": 0.3391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1389181911945343, + "step": 820, + "valid_targets_mean": 4282.4, + "valid_targets_min": 3210 + }, + { + "epoch": 0.8607198748043818, + "grad_norm": 0.6534772485541657, + "learning_rate": 3.993754846740249e-05, + "loss": 0.3266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17671330273151398, + "step": 825, + "valid_targets_mean": 4079.6, + "valid_targets_min": 1372 + }, + { + "epoch": 0.8659363588941054, + "grad_norm": 0.716751911406342, + "learning_rate": 3.9933374557595875e-05, + "loss": 0.3155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19154828786849976, + "step": 830, + "valid_targets_mean": 3904.6, + "valid_targets_min": 2652 + }, + { + "epoch": 0.8711528429838289, + "grad_norm": 0.5866842813395042, + "learning_rate": 3.992906587486076e-05, + "loss": 0.3309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13917279243469238, + "step": 835, + "valid_targets_mean": 2972.8, + "valid_targets_min": 1576 + }, + { + "epoch": 0.8763693270735524, + "grad_norm": 0.6145391207404436, + "learning_rate": 3.992462244832886e-05, + "loss": 0.3226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17863133549690247, + "step": 840, + "valid_targets_mean": 4349.8, + "valid_targets_min": 1841 + }, + { + "epoch": 0.881585811163276, + "grad_norm": 0.619565774689367, + "learning_rate": 3.9920044308042956e-05, + "loss": 0.3418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15874744951725006, + "step": 845, + "valid_targets_mean": 3644.6, + "valid_targets_min": 1728 + }, + { + "epoch": 0.8868022952529995, + "grad_norm": 0.6722706818538441, + "learning_rate": 3.991533148495662e-05, + "loss": 0.3343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18185463547706604, + "step": 850, + "valid_targets_mean": 3767.8, + "valid_targets_min": 1362 + }, + { + "epoch": 0.892018779342723, + "grad_norm": 0.5778048961711596, + "learning_rate": 3.991048401093405e-05, + "loss": 0.3314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1619892120361328, + "step": 855, + "valid_targets_mean": 3784.6, + "valid_targets_min": 852 + }, + { + "epoch": 0.8972352634324465, + "grad_norm": 0.7368724545478161, + "learning_rate": 3.990550191874985e-05, + "loss": 0.3365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1643509417772293, + "step": 860, + "valid_targets_mean": 2665.8, + "valid_targets_min": 1195 + }, + { + "epoch": 0.9024517475221701, + "grad_norm": 0.5624110545412722, + "learning_rate": 3.990038524208878e-05, + "loss": 0.3279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19632196426391602, + "step": 865, + "valid_targets_mean": 5114.6, + "valid_targets_min": 2264 + }, + { + "epoch": 0.9076682316118936, + "grad_norm": 0.5952967287959974, + "learning_rate": 3.9895134015545565e-05, + "loss": 0.3184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1296868771314621, + "step": 870, + "valid_targets_mean": 3751.8, + "valid_targets_min": 1975 + }, + { + "epoch": 0.9128847157016171, + "grad_norm": 0.5967739380814835, + "learning_rate": 3.988974827462464e-05, + "loss": 0.3116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17330387234687805, + "step": 875, + "valid_targets_mean": 4188.6, + "valid_targets_min": 2426 + }, + { + "epoch": 0.9181011997913406, + "grad_norm": 0.502869758248566, + "learning_rate": 3.98842280557399e-05, + "loss": 0.3174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16340631246566772, + "step": 880, + "valid_targets_mean": 6574.2, + "valid_targets_min": 2595 + }, + { + "epoch": 0.9233176838810642, + "grad_norm": 0.5935990113211929, + "learning_rate": 3.98785733962145e-05, + "loss": 0.3266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17132440209388733, + "step": 885, + "valid_targets_mean": 3589.6, + "valid_targets_min": 2826 + }, + { + "epoch": 0.9285341679707877, + "grad_norm": 0.6300490066993281, + "learning_rate": 3.9872784334280555e-05, + "loss": 0.3118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13730578124523163, + "step": 890, + "valid_targets_mean": 3107.8, + "valid_targets_min": 993 + }, + { + "epoch": 0.9337506520605112, + "grad_norm": 0.6623634414262182, + "learning_rate": 3.9866860909078876e-05, + "loss": 0.3178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1537298709154129, + "step": 895, + "valid_targets_mean": 3915.2, + "valid_targets_min": 1575 + }, + { + "epoch": 0.9389671361502347, + "grad_norm": 0.6762811017549515, + "learning_rate": 3.9860803160658756e-05, + "loss": 0.3211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18959565460681915, + "step": 900, + "valid_targets_mean": 3818.4, + "valid_targets_min": 1360 + }, + { + "epoch": 0.9441836202399583, + "grad_norm": 0.5852958880691829, + "learning_rate": 3.985461112997766e-05, + "loss": 0.3152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14180879294872284, + "step": 905, + "valid_targets_mean": 3653.1, + "valid_targets_min": 2414 + }, + { + "epoch": 0.9494001043296818, + "grad_norm": 0.5877461764677018, + "learning_rate": 3.9848284858900955e-05, + "loss": 0.3414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18998371064662933, + "step": 910, + "valid_targets_mean": 4546.2, + "valid_targets_min": 2717 + }, + { + "epoch": 0.9546165884194053, + "grad_norm": 0.7570838826959169, + "learning_rate": 3.984182439020164e-05, + "loss": 0.3343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1720515340566635, + "step": 915, + "valid_targets_mean": 3169.8, + "valid_targets_min": 1720 + }, + { + "epoch": 0.9598330725091289, + "grad_norm": 0.6927860743428793, + "learning_rate": 3.9835229767560034e-05, + "loss": 0.3287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1746203601360321, + "step": 920, + "valid_targets_mean": 3068.8, + "valid_targets_min": 1278 + }, + { + "epoch": 0.9650495565988524, + "grad_norm": 0.6593308990191299, + "learning_rate": 3.982850103556351e-05, + "loss": 0.3109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15618298947811127, + "step": 925, + "valid_targets_mean": 2745.8, + "valid_targets_min": 1858 + }, + { + "epoch": 0.9702660406885759, + "grad_norm": 0.6190366478644613, + "learning_rate": 3.982163823970615e-05, + "loss": 0.3265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14582376182079315, + "step": 930, + "valid_targets_mean": 3250.5, + "valid_targets_min": 1254 + }, + { + "epoch": 0.9754825247782994, + "grad_norm": 0.6817005267213448, + "learning_rate": 3.98146414263885e-05, + "loss": 0.3209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1816140115261078, + "step": 935, + "valid_targets_mean": 2633.0, + "valid_targets_min": 1039 + }, + { + "epoch": 0.980699008868023, + "grad_norm": 0.559275652331853, + "learning_rate": 3.980751064291719e-05, + "loss": 0.3309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17203202843666077, + "step": 940, + "valid_targets_mean": 4942.8, + "valid_targets_min": 2734 + }, + { + "epoch": 0.9859154929577465, + "grad_norm": 0.643296499141871, + "learning_rate": 3.980024593750466e-05, + "loss": 0.3252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16650418937206268, + "step": 945, + "valid_targets_mean": 3410.4, + "valid_targets_min": 809 + }, + { + "epoch": 0.99113197704747, + "grad_norm": 0.5846004226556762, + "learning_rate": 3.979284735926882e-05, + "loss": 0.3237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14251956343650818, + "step": 950, + "valid_targets_mean": 3692.1, + "valid_targets_min": 1974 + }, + { + "epoch": 0.9963484611371936, + "grad_norm": 0.6324144227355625, + "learning_rate": 3.978531495823271e-05, + "loss": 0.3319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1782522350549698, + "step": 955, + "valid_targets_mean": 4170.4, + "valid_targets_min": 2179 + }, + { + "epoch": 1.0010432968179448, + "grad_norm": 0.5546599240595003, + "learning_rate": 3.977764878532418e-05, + "loss": 0.3005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12547332048416138, + "step": 960, + "valid_targets_mean": 6935.0, + "valid_targets_min": 5295 + }, + { + "epoch": 1.0062597809076683, + "grad_norm": 0.4831819004713698, + "learning_rate": 3.976984889237551e-05, + "loss": 0.2361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13953888416290283, + "step": 965, + "valid_targets_mean": 7672.9, + "valid_targets_min": 5066 + }, + { + "epoch": 1.0114762649973918, + "grad_norm": 0.45643843715872456, + "learning_rate": 3.9761915332123113e-05, + "loss": 0.2295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11715692281723022, + "step": 970, + "valid_targets_mean": 7347.6, + "valid_targets_min": 4923 + }, + { + "epoch": 1.0166927490871154, + "grad_norm": 0.4765167125677193, + "learning_rate": 3.975384815820713e-05, + "loss": 0.2274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10759155452251434, + "step": 975, + "valid_targets_mean": 5446.8, + "valid_targets_min": 3922 + }, + { + "epoch": 1.021909233176839, + "grad_norm": 0.42433029852709786, + "learning_rate": 3.974564742517109e-05, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10840212553739548, + "step": 980, + "valid_targets_mean": 6924.9, + "valid_targets_min": 4797 + }, + { + "epoch": 1.0271257172665624, + "grad_norm": 0.47250888684832437, + "learning_rate": 3.973731318846155e-05, + "loss": 0.2446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12191439419984818, + "step": 985, + "valid_targets_mean": 6443.5, + "valid_targets_min": 3853 + }, + { + "epoch": 1.032342201356286, + "grad_norm": 0.4912859295156451, + "learning_rate": 3.97288455044277e-05, + "loss": 0.2366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11636251211166382, + "step": 990, + "valid_targets_mean": 5446.5, + "valid_targets_min": 4487 + }, + { + "epoch": 1.0375586854460095, + "grad_norm": 0.48462608869637236, + "learning_rate": 3.972024443032098e-05, + "loss": 0.2194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11166022717952728, + "step": 995, + "valid_targets_mean": 6210.2, + "valid_targets_min": 4137 + }, + { + "epoch": 1.042775169535733, + "grad_norm": 0.6282282902665347, + "learning_rate": 3.971151002429471e-05, + "loss": 0.2591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20216041803359985, + "step": 1000, + "valid_targets_mean": 5404.4, + "valid_targets_min": 698 + }, + { + "epoch": 1.0479916536254565, + "grad_norm": 0.43989577818634695, + "learning_rate": 3.97026423454037e-05, + "loss": 0.2184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12884503602981567, + "step": 1005, + "valid_targets_mean": 7197.6, + "valid_targets_min": 4499 + }, + { + "epoch": 1.05320813771518, + "grad_norm": 0.5116692682096196, + "learning_rate": 3.969364145360383e-05, + "loss": 0.2302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15146520733833313, + "step": 1010, + "valid_targets_mean": 5538.4, + "valid_targets_min": 4291 + }, + { + "epoch": 1.0584246218049036, + "grad_norm": 0.44806166036878736, + "learning_rate": 3.9684507409751655e-05, + "loss": 0.2179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09325650334358215, + "step": 1015, + "valid_targets_mean": 5531.4, + "valid_targets_min": 4050 + }, + { + "epoch": 1.063641105894627, + "grad_norm": 0.4427566395087562, + "learning_rate": 3.967524027560401e-05, + "loss": 0.2281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12759213149547577, + "step": 1020, + "valid_targets_mean": 7526.6, + "valid_targets_min": 5196 + }, + { + "epoch": 1.0688575899843507, + "grad_norm": 0.4332092372043874, + "learning_rate": 3.966584011381753e-05, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10353688895702362, + "step": 1025, + "valid_targets_mean": 6248.9, + "valid_targets_min": 4749 + }, + { + "epoch": 1.074074074074074, + "grad_norm": 0.5063677418230649, + "learning_rate": 3.965630698794833e-05, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058588460087776184, + "step": 1030, + "valid_targets_mean": 3248.5, + "valid_targets_min": 1908 + }, + { + "epoch": 1.0792905581637977, + "grad_norm": 0.4544218820807293, + "learning_rate": 3.964664096245149e-05, + "loss": 0.2068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10457111150026321, + "step": 1035, + "valid_targets_mean": 6225.6, + "valid_targets_min": 4808 + }, + { + "epoch": 1.084507042253521, + "grad_norm": 0.4645748146301178, + "learning_rate": 3.9636842102680635e-05, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10878760367631912, + "step": 1040, + "valid_targets_mean": 5383.0, + "valid_targets_min": 3774 + }, + { + "epoch": 1.0897235263432448, + "grad_norm": 0.42950892503053273, + "learning_rate": 3.962691047488754e-05, + "loss": 0.2157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1102849692106247, + "step": 1045, + "valid_targets_mean": 6291.4, + "valid_targets_min": 5335 + }, + { + "epoch": 1.094940010432968, + "grad_norm": 0.3854435656380156, + "learning_rate": 3.9616846146221606e-05, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11142845451831818, + "step": 1050, + "valid_targets_mean": 7749.1, + "valid_targets_min": 6205 + }, + { + "epoch": 1.1001564945226918, + "grad_norm": 0.47685281365813437, + "learning_rate": 3.960664918472947e-05, + "loss": 0.2048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10399685800075531, + "step": 1055, + "valid_targets_mean": 6882.1, + "valid_targets_min": 4428 + }, + { + "epoch": 1.1053729786124151, + "grad_norm": 0.5181199602052508, + "learning_rate": 3.959631965935453e-05, + "loss": 0.2264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1118919774889946, + "step": 1060, + "valid_targets_mean": 5751.8, + "valid_targets_min": 4458 + }, + { + "epoch": 1.1105894627021389, + "grad_norm": 0.48246509464215426, + "learning_rate": 3.958585763993646e-05, + "loss": 0.23, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09632065892219543, + "step": 1065, + "valid_targets_mean": 6003.9, + "valid_targets_min": 4458 + }, + { + "epoch": 1.1158059467918622, + "grad_norm": 0.4782313183987573, + "learning_rate": 3.9575263197210736e-05, + "loss": 0.2078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09873838722705841, + "step": 1070, + "valid_targets_mean": 6033.8, + "valid_targets_min": 4066 + }, + { + "epoch": 1.1210224308815857, + "grad_norm": 0.46133675146163483, + "learning_rate": 3.9564536402808194e-05, + "loss": 0.217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11760516464710236, + "step": 1075, + "valid_targets_mean": 6601.0, + "valid_targets_min": 4708 + }, + { + "epoch": 1.1262389149713092, + "grad_norm": 0.4995892153674224, + "learning_rate": 3.955367732925451e-05, + "loss": 0.2321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12128092348575592, + "step": 1080, + "valid_targets_mean": 6092.1, + "valid_targets_min": 4789 + }, + { + "epoch": 1.131455399061033, + "grad_norm": 0.4295626650139439, + "learning_rate": 3.954268604996972e-05, + "loss": 0.2514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12159417569637299, + "step": 1085, + "valid_targets_mean": 6474.6, + "valid_targets_min": 4224 + }, + { + "epoch": 1.1366718831507563, + "grad_norm": 0.46354996283977623, + "learning_rate": 3.953156263926773e-05, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11748499423265457, + "step": 1090, + "valid_targets_mean": 6312.4, + "valid_targets_min": 4617 + }, + { + "epoch": 1.14188836724048, + "grad_norm": 0.4582155762848208, + "learning_rate": 3.952030717235581e-05, + "loss": 0.2168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1118055209517479, + "step": 1095, + "valid_targets_mean": 6333.8, + "valid_targets_min": 4783 + }, + { + "epoch": 1.1471048513302033, + "grad_norm": 0.49772557162399456, + "learning_rate": 3.950891972533408e-05, + "loss": 0.2189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11112602055072784, + "step": 1100, + "valid_targets_mean": 7380.1, + "valid_targets_min": 6020 + }, + { + "epoch": 1.1523213354199269, + "grad_norm": 0.4837333990437587, + "learning_rate": 3.9497400375195e-05, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11588940024375916, + "step": 1105, + "valid_targets_mean": 6009.0, + "valid_targets_min": 3049 + }, + { + "epoch": 1.1575378195096504, + "grad_norm": 0.4689430403962076, + "learning_rate": 3.948574919982286e-05, + "loss": 0.2203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10131049901247025, + "step": 1110, + "valid_targets_mean": 5973.1, + "valid_targets_min": 4822 + }, + { + "epoch": 1.162754303599374, + "grad_norm": 0.48357870999800145, + "learning_rate": 3.947396627799322e-05, + "loss": 0.219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10432552546262741, + "step": 1115, + "valid_targets_mean": 5754.9, + "valid_targets_min": 2910 + }, + { + "epoch": 1.1679707876890975, + "grad_norm": 0.48533457473699715, + "learning_rate": 3.946205168937243e-05, + "loss": 0.2273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1246715784072876, + "step": 1120, + "valid_targets_mean": 6414.6, + "valid_targets_min": 4791 + }, + { + "epoch": 1.173187271778821, + "grad_norm": 0.4864678118581872, + "learning_rate": 3.945000551451703e-05, + "loss": 0.22, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11191219091415405, + "step": 1125, + "valid_targets_mean": 5675.9, + "valid_targets_min": 3983 + }, + { + "epoch": 1.1784037558685445, + "grad_norm": 0.400215764448564, + "learning_rate": 3.9437827834873265e-05, + "loss": 0.201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08970002830028534, + "step": 1130, + "valid_targets_mean": 6975.5, + "valid_targets_min": 4383 + }, + { + "epoch": 1.183620239958268, + "grad_norm": 0.5186590911761401, + "learning_rate": 3.942551873277649e-05, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.104623943567276, + "step": 1135, + "valid_targets_mean": 6417.1, + "valid_targets_min": 3182 + }, + { + "epoch": 1.1888367240479916, + "grad_norm": 0.4871344500385087, + "learning_rate": 3.941307829145063e-05, + "loss": 0.2251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1189800277352333, + "step": 1140, + "valid_targets_mean": 6218.6, + "valid_targets_min": 5077 + }, + { + "epoch": 1.194053208137715, + "grad_norm": 0.579589850103938, + "learning_rate": 3.9400506595007624e-05, + "loss": 0.2234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11050371825695038, + "step": 1145, + "valid_targets_mean": 4774.9, + "valid_targets_min": 3663 + }, + { + "epoch": 1.1992696922274386, + "grad_norm": 0.48511777487777324, + "learning_rate": 3.938780372844685e-05, + "loss": 0.2117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11199572682380676, + "step": 1150, + "valid_targets_mean": 6316.8, + "valid_targets_min": 4105 + }, + { + "epoch": 1.2044861763171621, + "grad_norm": 0.4219073995641171, + "learning_rate": 3.9374969777654535e-05, + "loss": 0.2129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09767965972423553, + "step": 1155, + "valid_targets_mean": 6528.9, + "valid_targets_min": 4537 + }, + { + "epoch": 1.2097026604068857, + "grad_norm": 0.4260227101202468, + "learning_rate": 3.93620048294032e-05, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09904081374406815, + "step": 1160, + "valid_targets_mean": 6416.8, + "valid_targets_min": 2494 + }, + { + "epoch": 1.2149191444966092, + "grad_norm": 0.46022445681444774, + "learning_rate": 3.9348908971351065e-05, + "loss": 0.2152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1129528284072876, + "step": 1165, + "valid_targets_mean": 6070.9, + "valid_targets_min": 4921 + }, + { + "epoch": 1.2201356285863327, + "grad_norm": 0.5370437541810903, + "learning_rate": 3.933568229204145e-05, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11515654623508453, + "step": 1170, + "valid_targets_mean": 6701.4, + "valid_targets_min": 5285 + }, + { + "epoch": 1.2253521126760563, + "grad_norm": 0.43471505752232553, + "learning_rate": 3.9322324880902177e-05, + "loss": 0.204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09950380772352219, + "step": 1175, + "valid_targets_mean": 6273.9, + "valid_targets_min": 4464 + }, + { + "epoch": 1.2305685967657798, + "grad_norm": 0.49721936380687, + "learning_rate": 3.930883682824496e-05, + "loss": 0.2246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09697577357292175, + "step": 1180, + "valid_targets_mean": 5554.8, + "valid_targets_min": 4439 + }, + { + "epoch": 1.2357850808555033, + "grad_norm": 0.39773448374121023, + "learning_rate": 3.929521822526483e-05, + "loss": 0.1995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08901071548461914, + "step": 1185, + "valid_targets_mean": 6043.0, + "valid_targets_min": 4686 + }, + { + "epoch": 1.2410015649452268, + "grad_norm": 0.4243970765973913, + "learning_rate": 3.928146916403946e-05, + "loss": 0.1847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09092367440462112, + "step": 1190, + "valid_targets_mean": 5912.5, + "valid_targets_min": 3381 + }, + { + "epoch": 1.2462180490349504, + "grad_norm": 0.43565332114886907, + "learning_rate": 3.926758973752859e-05, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09724549949169159, + "step": 1195, + "valid_targets_mean": 6495.5, + "valid_targets_min": 5309 + }, + { + "epoch": 1.251434533124674, + "grad_norm": 0.5144893819356356, + "learning_rate": 3.925358003957338e-05, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10769973695278168, + "step": 1200, + "valid_targets_mean": 6483.9, + "valid_targets_min": 5161 + }, + { + "epoch": 1.2566510172143974, + "grad_norm": 0.47013238082881725, + "learning_rate": 3.923944016489578e-05, + "loss": 0.2129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10393393784761429, + "step": 1205, + "valid_targets_mean": 5958.9, + "valid_targets_min": 4666 + }, + { + "epoch": 1.261867501304121, + "grad_norm": 0.7104265550828432, + "learning_rate": 3.9225170209097865e-05, + "loss": 0.2433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12710750102996826, + "step": 1210, + "valid_targets_mean": 1691.5, + "valid_targets_min": 1440 + }, + { + "epoch": 1.2670839853938445, + "grad_norm": 0.45662504852022473, + "learning_rate": 3.921077026866125e-05, + "loss": 0.2279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09339067339897156, + "step": 1215, + "valid_targets_mean": 5299.1, + "valid_targets_min": 4547 + }, + { + "epoch": 1.272300469483568, + "grad_norm": 0.5175889929118312, + "learning_rate": 3.919624044094636e-05, + "loss": 0.2023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10674049705266953, + "step": 1220, + "valid_targets_mean": 6003.5, + "valid_targets_min": 5031 + }, + { + "epoch": 1.2775169535732915, + "grad_norm": 0.4144725978526942, + "learning_rate": 3.918158082419184e-05, + "loss": 0.2118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09701692312955856, + "step": 1225, + "valid_targets_mean": 8169.6, + "valid_targets_min": 5487 + }, + { + "epoch": 1.282733437663015, + "grad_norm": 0.5001870704967278, + "learning_rate": 3.916679151751383e-05, + "loss": 0.2148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10032361000776291, + "step": 1230, + "valid_targets_mean": 6474.9, + "valid_targets_min": 4253 + }, + { + "epoch": 1.2879499217527386, + "grad_norm": 0.4439227822222747, + "learning_rate": 3.9151872620905356e-05, + "loss": 0.2262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12093880027532578, + "step": 1235, + "valid_targets_mean": 7021.0, + "valid_targets_min": 4990 + }, + { + "epoch": 1.2931664058424621, + "grad_norm": 0.4605872823351688, + "learning_rate": 3.913682423523561e-05, + "loss": 0.2081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12573207914829254, + "step": 1240, + "valid_targets_mean": 6356.0, + "valid_targets_min": 4385 + }, + { + "epoch": 1.2983828899321856, + "grad_norm": 0.5437484136106919, + "learning_rate": 3.912164646224927e-05, + "loss": 0.2145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11156106740236282, + "step": 1245, + "valid_targets_mean": 6815.1, + "valid_targets_min": 5164 + }, + { + "epoch": 1.3035993740219092, + "grad_norm": 0.43227512743781676, + "learning_rate": 3.910633940456585e-05, + "loss": 0.2035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07962863147258759, + "step": 1250, + "valid_targets_mean": 5588.5, + "valid_targets_min": 4098 + }, + { + "epoch": 1.3088158581116327, + "grad_norm": 0.4510479181391869, + "learning_rate": 3.909090316567896e-05, + "loss": 0.1938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10759300738573074, + "step": 1255, + "valid_targets_mean": 6167.6, + "valid_targets_min": 4801 + }, + { + "epoch": 1.3140323422013562, + "grad_norm": 0.537328028108305, + "learning_rate": 3.9075337849955614e-05, + "loss": 0.2318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2240067720413208, + "step": 1260, + "valid_targets_mean": 6224.9, + "valid_targets_min": 4772 + }, + { + "epoch": 1.3192488262910798, + "grad_norm": 0.5564243349138817, + "learning_rate": 3.905964356263558e-05, + "loss": 0.2296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11779792606830597, + "step": 1265, + "valid_targets_mean": 6549.2, + "valid_targets_min": 5025 + }, + { + "epoch": 1.3244653103808033, + "grad_norm": 0.45354229858109685, + "learning_rate": 3.9043820409830585e-05, + "loss": 0.1963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11518187820911407, + "step": 1270, + "valid_targets_mean": 6680.8, + "valid_targets_min": 4818 + }, + { + "epoch": 1.3296817944705268, + "grad_norm": 0.44993536132461515, + "learning_rate": 3.9027868498523645e-05, + "loss": 0.2121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1047646701335907, + "step": 1275, + "valid_targets_mean": 5835.1, + "valid_targets_min": 4111 + }, + { + "epoch": 1.3348982785602503, + "grad_norm": 0.4262438556658041, + "learning_rate": 3.901178793656836e-05, + "loss": 0.2131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10000675916671753, + "step": 1280, + "valid_targets_mean": 7029.9, + "valid_targets_min": 2275 + }, + { + "epoch": 1.3401147626499739, + "grad_norm": 0.4409835117621475, + "learning_rate": 3.899557883268811e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0899777039885521, + "step": 1285, + "valid_targets_mean": 5621.2, + "valid_targets_min": 4826 + }, + { + "epoch": 1.3453312467396974, + "grad_norm": 0.4128493662830254, + "learning_rate": 3.897924129647542e-05, + "loss": 0.1999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0923563688993454, + "step": 1290, + "valid_targets_mean": 6858.8, + "valid_targets_min": 4710 + }, + { + "epoch": 1.350547730829421, + "grad_norm": 0.40747885027196773, + "learning_rate": 3.896277543839114e-05, + "loss": 0.1875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13192376494407654, + "step": 1295, + "valid_targets_mean": 7012.5, + "valid_targets_min": 5421 + }, + { + "epoch": 1.3557642149191445, + "grad_norm": 0.39497838720510486, + "learning_rate": 3.894618136976372e-05, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09392920881509781, + "step": 1300, + "valid_targets_mean": 6967.6, + "valid_targets_min": 4949 + }, + { + "epoch": 1.360980699008868, + "grad_norm": 0.43385323695065775, + "learning_rate": 3.89294592027885e-05, + "loss": 0.194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10152305662631989, + "step": 1305, + "valid_targets_mean": 6348.5, + "valid_targets_min": 4831 + }, + { + "epoch": 1.3661971830985915, + "grad_norm": 0.4255364237482966, + "learning_rate": 3.891260905052685e-05, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1257421225309372, + "step": 1310, + "valid_targets_mean": 6597.2, + "valid_targets_min": 4018 + }, + { + "epoch": 1.371413667188315, + "grad_norm": 0.37668833077015024, + "learning_rate": 3.8895631026905546e-05, + "loss": 0.1924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07407970726490021, + "step": 1315, + "valid_targets_mean": 6420.9, + "valid_targets_min": 5034 + }, + { + "epoch": 1.3766301512780386, + "grad_norm": 0.43292554499596303, + "learning_rate": 3.887852524671587e-05, + "loss": 0.1997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1164761409163475, + "step": 1320, + "valid_targets_mean": 6962.4, + "valid_targets_min": 5442 + }, + { + "epoch": 1.381846635367762, + "grad_norm": 0.4691120581364764, + "learning_rate": 3.886129182561291e-05, + "loss": 0.2128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10786296427249908, + "step": 1325, + "valid_targets_mean": 5610.9, + "valid_targets_min": 4609 + }, + { + "epoch": 1.3870631194574856, + "grad_norm": 0.4956552612064659, + "learning_rate": 3.8843930880114745e-05, + "loss": 0.2185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1057450920343399, + "step": 1330, + "valid_targets_mean": 5038.4, + "valid_targets_min": 2301 + }, + { + "epoch": 1.3922796035472091, + "grad_norm": 0.5187688347085647, + "learning_rate": 3.882644252760168e-05, + "loss": 0.2825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14259420335292816, + "step": 1335, + "valid_targets_mean": 4602.4, + "valid_targets_min": 702 + }, + { + "epoch": 1.3974960876369327, + "grad_norm": 0.6054825605226738, + "learning_rate": 3.8808826886315426e-05, + "loss": 0.3194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1926257312297821, + "step": 1340, + "valid_targets_mean": 4814.2, + "valid_targets_min": 1005 + }, + { + "epoch": 1.4027125717266562, + "grad_norm": 0.4912729177659304, + "learning_rate": 3.8791084075358344e-05, + "loss": 0.3041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12406802177429199, + "step": 1345, + "valid_targets_mean": 4273.4, + "valid_targets_min": 2054 + }, + { + "epoch": 1.4079290558163797, + "grad_norm": 0.5679208676528138, + "learning_rate": 3.8773214214692564e-05, + "loss": 0.3055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11698244512081146, + "step": 1350, + "valid_targets_mean": 3367.4, + "valid_targets_min": 1561 + }, + { + "epoch": 1.4131455399061033, + "grad_norm": 0.5587002511027908, + "learning_rate": 3.8755217425139264e-05, + "loss": 0.3034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18428149819374084, + "step": 1355, + "valid_targets_mean": 5055.8, + "valid_targets_min": 1970 + }, + { + "epoch": 1.4183620239958268, + "grad_norm": 0.45926497231707597, + "learning_rate": 3.8737093828377785e-05, + "loss": 0.3158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16836829483509064, + "step": 1360, + "valid_targets_mean": 6716.9, + "valid_targets_min": 1651 + }, + { + "epoch": 1.4235785080855503, + "grad_norm": 0.5370771307706959, + "learning_rate": 3.8718843546944844e-05, + "loss": 0.3007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1585818976163864, + "step": 1365, + "valid_targets_mean": 6468.5, + "valid_targets_min": 2231 + }, + { + "epoch": 1.4287949921752738, + "grad_norm": 0.513696489883202, + "learning_rate": 3.870046670423369e-05, + "loss": 0.3032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12524789571762085, + "step": 1370, + "valid_targets_mean": 3683.6, + "valid_targets_min": 467 + }, + { + "epoch": 1.4340114762649974, + "grad_norm": 0.5154700412041995, + "learning_rate": 3.868196342449327e-05, + "loss": 0.3092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17861352860927582, + "step": 1375, + "valid_targets_mean": 6501.8, + "valid_targets_min": 1748 + }, + { + "epoch": 1.439227960354721, + "grad_norm": 0.5687543201652565, + "learning_rate": 3.8663333832827415e-05, + "loss": 0.311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15118902921676636, + "step": 1380, + "valid_targets_mean": 4381.8, + "valid_targets_min": 1697 + }, + { + "epoch": 1.4444444444444444, + "grad_norm": 0.658495726413035, + "learning_rate": 3.864457805519395e-05, + "loss": 0.3091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14276103675365448, + "step": 1385, + "valid_targets_mean": 2841.9, + "valid_targets_min": 1223 + }, + { + "epoch": 1.449660928534168, + "grad_norm": 0.4817171354105761, + "learning_rate": 3.862569621840385e-05, + "loss": 0.3029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15693464875221252, + "step": 1390, + "valid_targets_mean": 5914.2, + "valid_targets_min": 1552 + }, + { + "epoch": 1.4548774126238915, + "grad_norm": 0.5454815226244016, + "learning_rate": 3.860668845012044e-05, + "loss": 0.3066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14202246069908142, + "step": 1395, + "valid_targets_mean": 3926.2, + "valid_targets_min": 1546 + }, + { + "epoch": 1.460093896713615, + "grad_norm": 0.6047738618482181, + "learning_rate": 3.8587554878858466e-05, + "loss": 0.3038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16854584217071533, + "step": 1400, + "valid_targets_mean": 4878.8, + "valid_targets_min": 2362 + }, + { + "epoch": 1.4653103808033385, + "grad_norm": 0.580145735955046, + "learning_rate": 3.8568295633983244e-05, + "loss": 0.3154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14221327006816864, + "step": 1405, + "valid_targets_mean": 3567.4, + "valid_targets_min": 1538 + }, + { + "epoch": 1.470526864893062, + "grad_norm": 0.6316689464465992, + "learning_rate": 3.85489108457098e-05, + "loss": 0.3127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15490871667861938, + "step": 1410, + "valid_targets_mean": 3454.9, + "valid_targets_min": 1967 + }, + { + "epoch": 1.4757433489827856, + "grad_norm": 0.6575634819905788, + "learning_rate": 3.8529400645101984e-05, + "loss": 0.3085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1370844691991806, + "step": 1415, + "valid_targets_mean": 2960.2, + "valid_targets_min": 935 + }, + { + "epoch": 1.4809598330725091, + "grad_norm": 1.3647444735978609, + "learning_rate": 3.850976516407157e-05, + "loss": 0.3204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16425952315330505, + "step": 1420, + "valid_targets_mean": 3945.8, + "valid_targets_min": 2704 + }, + { + "epoch": 1.4861763171622326, + "grad_norm": 0.6864310683296885, + "learning_rate": 3.8490004535377356e-05, + "loss": 0.3184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13381925225257874, + "step": 1425, + "valid_targets_mean": 2315.9, + "valid_targets_min": 1173 + }, + { + "epoch": 1.4913928012519562, + "grad_norm": 0.5968674206185763, + "learning_rate": 3.8470118892624345e-05, + "loss": 0.3124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14967967569828033, + "step": 1430, + "valid_targets_mean": 4287.9, + "valid_targets_min": 1670 + }, + { + "epoch": 1.4966092853416797, + "grad_norm": 0.6128222238973604, + "learning_rate": 3.8450108370262714e-05, + "loss": 0.3134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1425648182630539, + "step": 1435, + "valid_targets_mean": 3211.1, + "valid_targets_min": 1329 + }, + { + "epoch": 1.5018257694314032, + "grad_norm": 0.6594663759123439, + "learning_rate": 3.8429973103587016e-05, + "loss": 0.3038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16858342289924622, + "step": 1440, + "valid_targets_mean": 3514.5, + "valid_targets_min": 2106 + }, + { + "epoch": 1.5070422535211268, + "grad_norm": 0.6112879207018653, + "learning_rate": 3.840971322873519e-05, + "loss": 0.2845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13176248967647552, + "step": 1445, + "valid_targets_mean": 4602.6, + "valid_targets_min": 2222 + }, + { + "epoch": 1.5122587376108503, + "grad_norm": 0.6174069293084833, + "learning_rate": 3.838932888268771e-05, + "loss": 0.3082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14681881666183472, + "step": 1450, + "valid_targets_mean": 3499.4, + "valid_targets_min": 1565 + }, + { + "epoch": 1.5174752217005738, + "grad_norm": 0.5659859921892517, + "learning_rate": 3.836882020326658e-05, + "loss": 0.307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14624708890914917, + "step": 1455, + "valid_targets_mean": 3692.0, + "valid_targets_min": 1951 + }, + { + "epoch": 1.5226917057902973, + "grad_norm": 0.5506475990930388, + "learning_rate": 3.834818732913448e-05, + "loss": 0.3153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16028809547424316, + "step": 1460, + "valid_targets_mean": 4503.1, + "valid_targets_min": 2450 + }, + { + "epoch": 1.5279081898800209, + "grad_norm": 0.6682228626257847, + "learning_rate": 3.8327430399793754e-05, + "loss": 0.2927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13012999296188354, + "step": 1465, + "valid_targets_mean": 2662.9, + "valid_targets_min": 997 + }, + { + "epoch": 1.5331246739697444, + "grad_norm": 0.5673286941776295, + "learning_rate": 3.8306549555585536e-05, + "loss": 0.2946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1437559425830841, + "step": 1470, + "valid_targets_mean": 4197.1, + "valid_targets_min": 3181 + }, + { + "epoch": 1.538341158059468, + "grad_norm": 0.6157995353226661, + "learning_rate": 3.828554493768876e-05, + "loss": 0.3103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1533483862876892, + "step": 1475, + "valid_targets_mean": 3967.5, + "valid_targets_min": 863 + }, + { + "epoch": 1.5435576421491914, + "grad_norm": 0.6583345780847943, + "learning_rate": 3.826441668811921e-05, + "loss": 0.2996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13885629177093506, + "step": 1480, + "valid_targets_mean": 3194.8, + "valid_targets_min": 1410 + }, + { + "epoch": 1.548774126238915, + "grad_norm": 0.5556991881081477, + "learning_rate": 3.8243164949728565e-05, + "loss": 0.2899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13212761282920837, + "step": 1485, + "valid_targets_mean": 4585.5, + "valid_targets_min": 2110 + }, + { + "epoch": 1.5539906103286385, + "grad_norm": 0.633918883393111, + "learning_rate": 3.8221789866203434e-05, + "loss": 0.3049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12798035144805908, + "step": 1490, + "valid_targets_mean": 3022.8, + "valid_targets_min": 1318 + }, + { + "epoch": 1.559207094418362, + "grad_norm": 0.5917227324756469, + "learning_rate": 3.820029158206438e-05, + "loss": 0.3048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1435876488685608, + "step": 1495, + "valid_targets_mean": 3901.4, + "valid_targets_min": 1876 + }, + { + "epoch": 1.5644235785080856, + "grad_norm": 0.6207260779906152, + "learning_rate": 3.817867024266497e-05, + "loss": 0.2961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1703071892261505, + "step": 1500, + "valid_targets_mean": 3791.0, + "valid_targets_min": 2605 + }, + { + "epoch": 1.569640062597809, + "grad_norm": 0.5972920470875116, + "learning_rate": 3.8156925994190735e-05, + "loss": 0.3086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13556455075740814, + "step": 1505, + "valid_targets_mean": 3304.4, + "valid_targets_min": 2018 + }, + { + "epoch": 1.5748565466875326, + "grad_norm": 0.5859302369989753, + "learning_rate": 3.813505898365822e-05, + "loss": 0.3062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14627033472061157, + "step": 1510, + "valid_targets_mean": 3980.9, + "valid_targets_min": 1714 + }, + { + "epoch": 1.5800730307772561, + "grad_norm": 0.5169729265415501, + "learning_rate": 3.8113069358914024e-05, + "loss": 0.3002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16340120136737823, + "step": 1515, + "valid_targets_mean": 5640.4, + "valid_targets_min": 2033 + }, + { + "epoch": 1.5852895148669797, + "grad_norm": 0.618250703006501, + "learning_rate": 3.80909572686337e-05, + "loss": 0.2934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1767168641090393, + "step": 1520, + "valid_targets_mean": 3889.9, + "valid_targets_min": 1924 + }, + { + "epoch": 1.5905059989567032, + "grad_norm": 0.6670817430101071, + "learning_rate": 3.806872286232086e-05, + "loss": 0.3004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14657537639141083, + "step": 1525, + "valid_targets_mean": 3436.1, + "valid_targets_min": 2098 + }, + { + "epoch": 1.5957224830464267, + "grad_norm": 0.7303194758012859, + "learning_rate": 3.80463662903061e-05, + "loss": 0.3061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22889497876167297, + "step": 1530, + "valid_targets_mean": 4295.0, + "valid_targets_min": 1363 + }, + { + "epoch": 1.6009389671361502, + "grad_norm": 0.6485588551989856, + "learning_rate": 3.802388770374598e-05, + "loss": 0.2918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16088950634002686, + "step": 1535, + "valid_targets_mean": 3449.4, + "valid_targets_min": 1087 + }, + { + "epoch": 1.6061554512258738, + "grad_norm": 0.608378082293793, + "learning_rate": 3.8001287254622064e-05, + "loss": 0.3068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17304860055446625, + "step": 1540, + "valid_targets_mean": 4451.1, + "valid_targets_min": 2483 + }, + { + "epoch": 1.6113719353155973, + "grad_norm": 0.594001555879516, + "learning_rate": 3.797856509573981e-05, + "loss": 0.2975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15198184549808502, + "step": 1545, + "valid_targets_mean": 3770.6, + "valid_targets_min": 2316 + }, + { + "epoch": 1.6165884194053208, + "grad_norm": 0.5922257593467875, + "learning_rate": 3.795572138072759e-05, + "loss": 0.2938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1441056728363037, + "step": 1550, + "valid_targets_mean": 3776.6, + "valid_targets_min": 2022 + }, + { + "epoch": 1.6218049034950444, + "grad_norm": 0.6217413743365142, + "learning_rate": 3.793275626403564e-05, + "loss": 0.2854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10389462113380432, + "step": 1555, + "valid_targets_mean": 2919.2, + "valid_targets_min": 1294 + }, + { + "epoch": 1.6270213875847679, + "grad_norm": 0.6624146905948954, + "learning_rate": 3.790966990093503e-05, + "loss": 0.3033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1572236269712448, + "step": 1560, + "valid_targets_mean": 3461.2, + "valid_targets_min": 1700 + }, + { + "epoch": 1.6322378716744914, + "grad_norm": 0.6700285011435937, + "learning_rate": 3.788646244751654e-05, + "loss": 0.2929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1439102292060852, + "step": 1565, + "valid_targets_mean": 2870.9, + "valid_targets_min": 1586 + }, + { + "epoch": 1.637454355764215, + "grad_norm": 0.69548840181902, + "learning_rate": 3.7863134060689734e-05, + "loss": 0.2986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16934987902641296, + "step": 1570, + "valid_targets_mean": 3219.4, + "valid_targets_min": 1358 + }, + { + "epoch": 1.6426708398539385, + "grad_norm": 0.6720081144194161, + "learning_rate": 3.783968489818179e-05, + "loss": 0.3034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15724194049835205, + "step": 1575, + "valid_targets_mean": 3983.4, + "valid_targets_min": 2290 + }, + { + "epoch": 1.647887323943662, + "grad_norm": 0.5763564972903934, + "learning_rate": 3.781611511853646e-05, + "loss": 0.2995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1925354152917862, + "step": 1580, + "valid_targets_mean": 5156.4, + "valid_targets_min": 2189 + }, + { + "epoch": 1.6531038080333855, + "grad_norm": 0.5989198901402524, + "learning_rate": 3.779242488111304e-05, + "loss": 0.296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1802498698234558, + "step": 1585, + "valid_targets_mean": 4121.2, + "valid_targets_min": 2121 + }, + { + "epoch": 1.658320292123109, + "grad_norm": 0.6103792343493881, + "learning_rate": 3.776861434608524e-05, + "loss": 0.3055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2017747461795807, + "step": 1590, + "valid_targets_mean": 4899.4, + "valid_targets_min": 2011 + }, + { + "epoch": 1.6635367762128326, + "grad_norm": 0.5891826184490707, + "learning_rate": 3.774468367444012e-05, + "loss": 0.2862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12466123700141907, + "step": 1595, + "valid_targets_mean": 3752.9, + "valid_targets_min": 2674 + }, + { + "epoch": 1.668753260302556, + "grad_norm": 0.603441297814451, + "learning_rate": 3.7720633027977034e-05, + "loss": 0.2808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1694631427526474, + "step": 1600, + "valid_targets_mean": 3976.4, + "valid_targets_min": 2266 + }, + { + "epoch": 1.6739697443922796, + "grad_norm": 0.5426372283845846, + "learning_rate": 3.7696462569306467e-05, + "loss": 0.2848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13305452466011047, + "step": 1605, + "valid_targets_mean": 4403.5, + "valid_targets_min": 904 + }, + { + "epoch": 1.6791862284820032, + "grad_norm": 0.6759673666699528, + "learning_rate": 3.7672172461849e-05, + "loss": 0.2908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20155087113380432, + "step": 1610, + "valid_targets_mean": 4388.2, + "valid_targets_min": 1219 + }, + { + "epoch": 1.6844027125717267, + "grad_norm": 0.5860795835967149, + "learning_rate": 3.7647762869834164e-05, + "loss": 0.2951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12755990028381348, + "step": 1615, + "valid_targets_mean": 3888.8, + "valid_targets_min": 1658 + }, + { + "epoch": 1.6896191966614502, + "grad_norm": 0.688786002770473, + "learning_rate": 3.7623233958299364e-05, + "loss": 0.2942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13528330624103546, + "step": 1620, + "valid_targets_mean": 3065.0, + "valid_targets_min": 1423 + }, + { + "epoch": 1.6948356807511737, + "grad_norm": 0.6238202448889447, + "learning_rate": 3.7598585893088726e-05, + "loss": 0.2908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12375392764806747, + "step": 1625, + "valid_targets_mean": 3947.2, + "valid_targets_min": 2320 + }, + { + "epoch": 1.7000521648408973, + "grad_norm": 0.597061686721246, + "learning_rate": 3.7573818840852004e-05, + "loss": 0.2979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1421426385641098, + "step": 1630, + "valid_targets_mean": 3820.2, + "valid_targets_min": 1811 + }, + { + "epoch": 1.7052686489306208, + "grad_norm": 0.6300122509657887, + "learning_rate": 3.754893296904344e-05, + "loss": 0.2991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13351641595363617, + "step": 1635, + "valid_targets_mean": 3953.8, + "valid_targets_min": 952 + }, + { + "epoch": 1.7104851330203443, + "grad_norm": 0.6880335658375903, + "learning_rate": 3.752392844592064e-05, + "loss": 0.2946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13405336439609528, + "step": 1640, + "valid_targets_mean": 2668.1, + "valid_targets_min": 1364 + }, + { + "epoch": 1.7157016171100679, + "grad_norm": 0.7459623933331333, + "learning_rate": 3.7498805440543436e-05, + "loss": 0.2856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15350404381752014, + "step": 1645, + "valid_targets_mean": 3530.9, + "valid_targets_min": 1397 + }, + { + "epoch": 1.7209181011997914, + "grad_norm": 0.6514859568799165, + "learning_rate": 3.747356412277272e-05, + "loss": 0.279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14739996194839478, + "step": 1650, + "valid_targets_mean": 4327.6, + "valid_targets_min": 1415 + }, + { + "epoch": 1.726134585289515, + "grad_norm": 0.5729808826900179, + "learning_rate": 3.744820466326933e-05, + "loss": 0.287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.151767760515213, + "step": 1655, + "valid_targets_mean": 3737.0, + "valid_targets_min": 621 + }, + { + "epoch": 1.7313510693792384, + "grad_norm": 0.6919498654026515, + "learning_rate": 3.7422727233492876e-05, + "loss": 0.2773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11514987051486969, + "step": 1660, + "valid_targets_mean": 3610.9, + "valid_targets_min": 1290 + }, + { + "epoch": 1.736567553468962, + "grad_norm": 0.6941672218905252, + "learning_rate": 3.739713200570058e-05, + "loss": 0.2781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1195855587720871, + "step": 1665, + "valid_targets_mean": 2158.9, + "valid_targets_min": 642 + }, + { + "epoch": 1.7417840375586855, + "grad_norm": 0.7001433778715204, + "learning_rate": 3.737141915294612e-05, + "loss": 0.2771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15000997483730316, + "step": 1670, + "valid_targets_mean": 3336.2, + "valid_targets_min": 2109 + }, + { + "epoch": 1.747000521648409, + "grad_norm": 0.6593745184658015, + "learning_rate": 3.734558884907847e-05, + "loss": 0.2884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18376824259757996, + "step": 1675, + "valid_targets_mean": 4459.6, + "valid_targets_min": 2590 + }, + { + "epoch": 1.7522170057381325, + "grad_norm": 0.6051772779279558, + "learning_rate": 3.7319641268740684e-05, + "loss": 0.306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12143544852733612, + "step": 1680, + "valid_targets_mean": 3302.4, + "valid_targets_min": 1510 + }, + { + "epoch": 1.757433489827856, + "grad_norm": 0.7488373124394101, + "learning_rate": 3.729357658736877e-05, + "loss": 0.2863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1756652295589447, + "step": 1685, + "valid_targets_mean": 3270.5, + "valid_targets_min": 987 + }, + { + "epoch": 1.7626499739175796, + "grad_norm": 0.5845976260629392, + "learning_rate": 3.7267394981190456e-05, + "loss": 0.2921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1425667107105255, + "step": 1690, + "valid_targets_mean": 3920.6, + "valid_targets_min": 1609 + }, + { + "epoch": 1.7678664580073031, + "grad_norm": 0.5779612670816169, + "learning_rate": 3.724109662722402e-05, + "loss": 0.2809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1237059235572815, + "step": 1695, + "valid_targets_mean": 3647.4, + "valid_targets_min": 1700 + }, + { + "epoch": 1.7730829420970267, + "grad_norm": 0.4868511782722327, + "learning_rate": 3.72146817032771e-05, + "loss": 0.2828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1257198303937912, + "step": 1700, + "valid_targets_mean": 5263.6, + "valid_targets_min": 1767 + }, + { + "epoch": 1.77829942618675, + "grad_norm": 0.5150911159813736, + "learning_rate": 3.718815038794549e-05, + "loss": 0.2862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10907316207885742, + "step": 1705, + "valid_targets_mean": 3840.6, + "valid_targets_min": 1511 + }, + { + "epoch": 1.7835159102764737, + "grad_norm": 0.6785522941137018, + "learning_rate": 3.71615028606119e-05, + "loss": 0.2793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10846004635095596, + "step": 1710, + "valid_targets_mean": 2503.0, + "valid_targets_min": 974 + }, + { + "epoch": 1.788732394366197, + "grad_norm": 0.5780323542319622, + "learning_rate": 3.713473930144479e-05, + "loss": 0.2892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11437453329563141, + "step": 1715, + "valid_targets_mean": 3380.5, + "valid_targets_min": 1476 + }, + { + "epoch": 1.7939488784559208, + "grad_norm": 0.5942686361141056, + "learning_rate": 3.710785989139713e-05, + "loss": 0.2836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14181846380233765, + "step": 1720, + "valid_targets_mean": 3563.6, + "valid_targets_min": 1249 + }, + { + "epoch": 1.799165362545644, + "grad_norm": 0.5846931876203432, + "learning_rate": 3.7080864812205176e-05, + "loss": 0.2938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1274687498807907, + "step": 1725, + "valid_targets_mean": 3249.8, + "valid_targets_min": 1669 + }, + { + "epoch": 1.8043818466353678, + "grad_norm": 0.6948673007435581, + "learning_rate": 3.705375424638723e-05, + "loss": 0.3029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14867034554481506, + "step": 1730, + "valid_targets_mean": 2861.2, + "valid_targets_min": 1318 + }, + { + "epoch": 1.8095983307250911, + "grad_norm": 0.7342173352643555, + "learning_rate": 3.702652837724244e-05, + "loss": 0.2915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15044555068016052, + "step": 1735, + "valid_targets_mean": 2206.4, + "valid_targets_min": 1002 + }, + { + "epoch": 1.8148148148148149, + "grad_norm": 0.6502303690998679, + "learning_rate": 3.6999187388849517e-05, + "loss": 0.3122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1127261072397232, + "step": 1740, + "valid_targets_mean": 2393.9, + "valid_targets_min": 845 + }, + { + "epoch": 1.8200312989045382, + "grad_norm": 0.5397777566028302, + "learning_rate": 3.697173146606553e-05, + "loss": 0.2821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15083134174346924, + "step": 1745, + "valid_targets_mean": 4607.1, + "valid_targets_min": 2611 + }, + { + "epoch": 1.825247782994262, + "grad_norm": 0.6007625193688588, + "learning_rate": 3.694416079452463e-05, + "loss": 0.3046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13365799188613892, + "step": 1750, + "valid_targets_mean": 3470.9, + "valid_targets_min": 1241 + }, + { + "epoch": 1.8304642670839852, + "grad_norm": 0.6677806434463449, + "learning_rate": 3.6916475560636806e-05, + "loss": 0.3054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17650499939918518, + "step": 1755, + "valid_targets_mean": 3474.2, + "valid_targets_min": 992 + }, + { + "epoch": 1.835680751173709, + "grad_norm": 0.613767474260724, + "learning_rate": 3.688867595158663e-05, + "loss": 0.291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13226228952407837, + "step": 1760, + "valid_targets_mean": 3450.8, + "valid_targets_min": 1133 + }, + { + "epoch": 1.8408972352634323, + "grad_norm": 0.6338379511269703, + "learning_rate": 3.686076215533198e-05, + "loss": 0.2936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1492290496826172, + "step": 1765, + "valid_targets_mean": 3689.0, + "valid_targets_min": 2256 + }, + { + "epoch": 1.846113719353156, + "grad_norm": 0.6394213637775186, + "learning_rate": 3.683273436060275e-05, + "loss": 0.2974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1294146180152893, + "step": 1770, + "valid_targets_mean": 4224.8, + "valid_targets_min": 1919 + }, + { + "epoch": 1.8513302034428794, + "grad_norm": 0.6006524929365775, + "learning_rate": 3.680459275689964e-05, + "loss": 0.2996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1403236985206604, + "step": 1775, + "valid_targets_mean": 3339.4, + "valid_targets_min": 2763 + }, + { + "epoch": 1.856546687532603, + "grad_norm": 0.654348563170971, + "learning_rate": 3.677633753449278e-05, + "loss": 0.2967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14017406105995178, + "step": 1780, + "valid_targets_mean": 2628.4, + "valid_targets_min": 617 + }, + { + "epoch": 1.8617631716223264, + "grad_norm": 1.2420790288426253, + "learning_rate": 3.674796888442056e-05, + "loss": 0.2742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1438930481672287, + "step": 1785, + "valid_targets_mean": 4144.9, + "valid_targets_min": 3147 + }, + { + "epoch": 1.8669796557120502, + "grad_norm": 0.5937853335509925, + "learning_rate": 3.671948699848822e-05, + "loss": 0.2788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13330118358135223, + "step": 1790, + "valid_targets_mean": 3559.9, + "valid_targets_min": 1844 + }, + { + "epoch": 1.8721961398017735, + "grad_norm": 0.5664465173167643, + "learning_rate": 3.6690892069266624e-05, + "loss": 0.2879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13768121600151062, + "step": 1795, + "valid_targets_mean": 4051.6, + "valid_targets_min": 2330 + }, + { + "epoch": 1.8774126238914972, + "grad_norm": 0.7093101065101584, + "learning_rate": 3.666218429009094e-05, + "loss": 0.295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17705996334552765, + "step": 1800, + "valid_targets_mean": 3475.0, + "valid_targets_min": 1066 + }, + { + "epoch": 1.8826291079812205, + "grad_norm": 0.5770402866817037, + "learning_rate": 3.6633363855059336e-05, + "loss": 0.2817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14187762141227722, + "step": 1805, + "valid_targets_mean": 4077.0, + "valid_targets_min": 1713 + }, + { + "epoch": 1.8878455920709443, + "grad_norm": 0.6464808335655965, + "learning_rate": 3.6604430959031676e-05, + "loss": 0.2977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16321887075901031, + "step": 1810, + "valid_targets_mean": 3838.5, + "valid_targets_min": 981 + }, + { + "epoch": 1.8930620761606676, + "grad_norm": 0.602887295039398, + "learning_rate": 3.6575385797628166e-05, + "loss": 0.2853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1670791357755661, + "step": 1815, + "valid_targets_mean": 4425.5, + "valid_targets_min": 3325 + }, + { + "epoch": 1.8982785602503913, + "grad_norm": 1.1404328086734137, + "learning_rate": 3.654622856722808e-05, + "loss": 0.2916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14890897274017334, + "step": 1820, + "valid_targets_mean": 3307.1, + "valid_targets_min": 1525 + }, + { + "epoch": 1.9034950443401146, + "grad_norm": 0.5514334793032353, + "learning_rate": 3.651695946496839e-05, + "loss": 0.2847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13171306252479553, + "step": 1825, + "valid_targets_mean": 3937.9, + "valid_targets_min": 2273 + }, + { + "epoch": 1.9087115284298384, + "grad_norm": 0.6028999221766216, + "learning_rate": 3.6487578688742485e-05, + "loss": 0.2777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12609978020191193, + "step": 1830, + "valid_targets_mean": 3462.4, + "valid_targets_min": 2106 + }, + { + "epoch": 1.9139280125195617, + "grad_norm": 0.566329695929919, + "learning_rate": 3.6458086437198764e-05, + "loss": 0.2692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12038545310497284, + "step": 1835, + "valid_targets_mean": 3902.1, + "valid_targets_min": 1687 + }, + { + "epoch": 1.9191444966092854, + "grad_norm": 0.708491618724878, + "learning_rate": 3.642848290973934e-05, + "loss": 0.279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14792010188102722, + "step": 1840, + "valid_targets_mean": 3604.1, + "valid_targets_min": 2246 + }, + { + "epoch": 1.9243609806990087, + "grad_norm": 0.6111723971739514, + "learning_rate": 3.6398768306518706e-05, + "loss": 0.2823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12399682402610779, + "step": 1845, + "valid_targets_mean": 3352.8, + "valid_targets_min": 906 + }, + { + "epoch": 1.9295774647887325, + "grad_norm": 0.7728567271078919, + "learning_rate": 3.636894282844233e-05, + "loss": 0.2738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16505613923072815, + "step": 1850, + "valid_targets_mean": 4221.8, + "valid_targets_min": 1874 + }, + { + "epoch": 1.9347939488784558, + "grad_norm": 0.597795536782726, + "learning_rate": 3.6339006677165316e-05, + "loss": 0.2737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15368643403053284, + "step": 1855, + "valid_targets_mean": 4002.5, + "valid_targets_min": 2145 + }, + { + "epoch": 1.9400104329681795, + "grad_norm": 0.5969464471220197, + "learning_rate": 3.630896005509108e-05, + "loss": 0.2783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13363295793533325, + "step": 1860, + "valid_targets_mean": 3473.2, + "valid_targets_min": 1697 + }, + { + "epoch": 1.9452269170579028, + "grad_norm": 0.6816327791462166, + "learning_rate": 3.6278803165369926e-05, + "loss": 0.279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1429697722196579, + "step": 1865, + "valid_targets_mean": 2876.6, + "valid_targets_min": 897 + }, + { + "epoch": 1.9504434011476266, + "grad_norm": 0.6387303590488981, + "learning_rate": 3.6248536211897715e-05, + "loss": 0.2992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1371040940284729, + "step": 1870, + "valid_targets_mean": 3168.4, + "valid_targets_min": 1695 + }, + { + "epoch": 1.95565988523735, + "grad_norm": 0.6626852541161347, + "learning_rate": 3.621815939931444e-05, + "loss": 0.288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13776037096977234, + "step": 1875, + "valid_targets_mean": 3473.4, + "valid_targets_min": 1712 + }, + { + "epoch": 1.9608763693270737, + "grad_norm": 0.6160863443600232, + "learning_rate": 3.6187672933002904e-05, + "loss": 0.2826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1140759214758873, + "step": 1880, + "valid_targets_mean": 2963.9, + "valid_targets_min": 993 + }, + { + "epoch": 1.966092853416797, + "grad_norm": 0.5915380424385445, + "learning_rate": 3.6157077019087254e-05, + "loss": 0.2729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14681151509284973, + "step": 1885, + "valid_targets_mean": 4306.5, + "valid_targets_min": 2027 + }, + { + "epoch": 1.9713093375065207, + "grad_norm": 0.5740372276660152, + "learning_rate": 3.612637186443169e-05, + "loss": 0.2806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1387580931186676, + "step": 1890, + "valid_targets_mean": 4049.0, + "valid_targets_min": 2250 + }, + { + "epoch": 1.976525821596244, + "grad_norm": 0.5556376374149623, + "learning_rate": 3.609555767663895e-05, + "loss": 0.2866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1416279375553131, + "step": 1895, + "valid_targets_mean": 4258.1, + "valid_targets_min": 2892 + }, + { + "epoch": 1.9817423056859678, + "grad_norm": 0.5654321243722964, + "learning_rate": 3.6064634664048996e-05, + "loss": 0.2915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15565285086631775, + "step": 1900, + "valid_targets_mean": 4290.0, + "valid_targets_min": 2965 + }, + { + "epoch": 1.986958789775691, + "grad_norm": 1.2640244720186264, + "learning_rate": 3.603360303573757e-05, + "loss": 0.2812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11590300500392914, + "step": 1905, + "valid_targets_mean": 3887.6, + "valid_targets_min": 1817 + }, + { + "epoch": 1.9921752738654148, + "grad_norm": 0.5805116701514224, + "learning_rate": 3.600246300151476e-05, + "loss": 0.2852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16019999980926514, + "step": 1910, + "valid_targets_mean": 4666.2, + "valid_targets_min": 1727 + }, + { + "epoch": 1.9973917579551381, + "grad_norm": 0.6466012837388088, + "learning_rate": 3.597121477192364e-05, + "loss": 0.2816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12988632917404175, + "step": 1915, + "valid_targets_mean": 3806.2, + "valid_targets_min": 1959 + }, + { + "epoch": 2.0020865936358896, + "grad_norm": 0.4867366190174163, + "learning_rate": 3.593985855823878e-05, + "loss": 0.2497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09875117242336273, + "step": 1920, + "valid_targets_mean": 5904.0, + "valid_targets_min": 4967 + }, + { + "epoch": 2.007303077725613, + "grad_norm": 0.4112533046508729, + "learning_rate": 3.590839457246487e-05, + "loss": 0.1965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1143631711602211, + "step": 1925, + "valid_targets_mean": 8641.4, + "valid_targets_min": 5164 + }, + { + "epoch": 2.0125195618153366, + "grad_norm": 0.410267178567368, + "learning_rate": 3.587682302733527e-05, + "loss": 0.1901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09275348484516144, + "step": 1930, + "valid_targets_mean": 6838.2, + "valid_targets_min": 5200 + }, + { + "epoch": 2.01773604590506, + "grad_norm": 0.4465527341598931, + "learning_rate": 3.584514413631054e-05, + "loss": 0.1887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07828178256750107, + "step": 1935, + "valid_targets_mean": 5411.6, + "valid_targets_min": 3853 + }, + { + "epoch": 2.0229525299947837, + "grad_norm": 0.44865366783474014, + "learning_rate": 3.581335811357703e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10374307632446289, + "step": 1940, + "valid_targets_mean": 5598.0, + "valid_targets_min": 4669 + }, + { + "epoch": 2.028169014084507, + "grad_norm": 0.4407626916178103, + "learning_rate": 3.578146517404546e-05, + "loss": 0.2044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10188546776771545, + "step": 1945, + "valid_targets_mean": 5953.0, + "valid_targets_min": 4216 + }, + { + "epoch": 2.0333854981742308, + "grad_norm": 0.43505782561162226, + "learning_rate": 3.574946553334938e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10234092175960541, + "step": 1950, + "valid_targets_mean": 5914.0, + "valid_targets_min": 4589 + }, + { + "epoch": 2.038601982263954, + "grad_norm": 0.4332568891661802, + "learning_rate": 3.571735940784381e-05, + "loss": 0.1879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09660466015338898, + "step": 1955, + "valid_targets_mean": 6515.4, + "valid_targets_min": 4588 + }, + { + "epoch": 2.043818466353678, + "grad_norm": 0.40762413447035345, + "learning_rate": 3.5685147014603705e-05, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0903250128030777, + "step": 1960, + "valid_targets_mean": 7015.2, + "valid_targets_min": 4530 + }, + { + "epoch": 2.049034950443401, + "grad_norm": 0.4232390564554361, + "learning_rate": 3.5652828571422536e-05, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10480391979217529, + "step": 1965, + "valid_targets_mean": 6202.6, + "valid_targets_min": 4837 + }, + { + "epoch": 2.054251434533125, + "grad_norm": 0.5432117018993393, + "learning_rate": 3.5620404296810766e-05, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06316159665584564, + "step": 1970, + "valid_targets_mean": 1720.9, + "valid_targets_min": 229 + }, + { + "epoch": 2.059467918622848, + "grad_norm": 0.4486086884748405, + "learning_rate": 3.558787440999442e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09412388503551483, + "step": 1975, + "valid_targets_mean": 5963.8, + "valid_targets_min": 4216 + }, + { + "epoch": 2.064684402712572, + "grad_norm": 0.39695723227568214, + "learning_rate": 3.55552391309136e-05, + "loss": 0.1972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09189904481172562, + "step": 1980, + "valid_targets_mean": 5973.9, + "valid_targets_min": 4835 + }, + { + "epoch": 2.0699008868022952, + "grad_norm": 0.3850161709598132, + "learning_rate": 3.5522498680220954e-05, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08257880806922913, + "step": 1985, + "valid_targets_mean": 6669.8, + "valid_targets_min": 4995 + }, + { + "epoch": 2.075117370892019, + "grad_norm": 0.5110983989936284, + "learning_rate": 3.5489653279280225e-05, + "loss": 0.1781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13060474395751953, + "step": 1990, + "valid_targets_mean": 5705.5, + "valid_targets_min": 4108 + }, + { + "epoch": 2.0803338549817423, + "grad_norm": 0.4909042342925742, + "learning_rate": 3.545670315016475e-05, + "loss": 0.1869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11902952194213867, + "step": 1995, + "valid_targets_mean": 6594.0, + "valid_targets_min": 4527 + }, + { + "epoch": 2.085550339071466, + "grad_norm": 0.5044620439498795, + "learning_rate": 3.5423648515655934e-05, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1048242598772049, + "step": 2000, + "valid_targets_mean": 3100.9, + "valid_targets_min": 2104 + }, + { + "epoch": 2.0907668231611893, + "grad_norm": 0.4163965620413746, + "learning_rate": 3.539048959924178e-05, + "loss": 0.176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08262741565704346, + "step": 2005, + "valid_targets_mean": 5817.8, + "valid_targets_min": 4757 + }, + { + "epoch": 2.095983307250913, + "grad_norm": 0.4352315984383143, + "learning_rate": 3.535722662511535e-05, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11256035417318344, + "step": 2010, + "valid_targets_mean": 6333.0, + "valid_targets_min": 5056 + }, + { + "epoch": 2.1011997913406364, + "grad_norm": 0.43066759830942797, + "learning_rate": 3.532385981817326e-05, + "loss": 0.177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0854366347193718, + "step": 2015, + "valid_targets_mean": 5956.0, + "valid_targets_min": 2834 + }, + { + "epoch": 2.10641627543036, + "grad_norm": 0.43609618583920684, + "learning_rate": 3.5290389404014136e-05, + "loss": 0.208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12734225392341614, + "step": 2020, + "valid_targets_mean": 6984.6, + "valid_targets_min": 5744 + }, + { + "epoch": 2.1116327595200834, + "grad_norm": 0.38112849448172265, + "learning_rate": 3.5256815608937155e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08727939426898956, + "step": 2025, + "valid_targets_mean": 7290.2, + "valid_targets_min": 5640 + }, + { + "epoch": 2.116849243609807, + "grad_norm": 0.4122395928315619, + "learning_rate": 3.522313865994043e-05, + "loss": 0.1836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08934570848941803, + "step": 2030, + "valid_targets_mean": 7036.0, + "valid_targets_min": 5210 + }, + { + "epoch": 2.1220657276995305, + "grad_norm": 0.4284090874286304, + "learning_rate": 3.518935878471952e-05, + "loss": 0.1968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12471405416727066, + "step": 2035, + "valid_targets_mean": 7138.5, + "valid_targets_min": 5292 + }, + { + "epoch": 2.127282211789254, + "grad_norm": 0.4271528238025263, + "learning_rate": 3.515547621166591e-05, + "loss": 0.2069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10126630961894989, + "step": 2040, + "valid_targets_mean": 6134.2, + "valid_targets_min": 3531 + }, + { + "epoch": 2.1324986958789776, + "grad_norm": 0.43290360431683494, + "learning_rate": 3.5121491169865425e-05, + "loss": 0.2229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10257638990879059, + "step": 2045, + "valid_targets_mean": 5968.2, + "valid_targets_min": 4640 + }, + { + "epoch": 2.1377151799687013, + "grad_norm": 0.4524020751596252, + "learning_rate": 3.508740388909669e-05, + "loss": 0.2055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09742672741413116, + "step": 2050, + "valid_targets_mean": 6249.6, + "valid_targets_min": 4328 + }, + { + "epoch": 2.1429316640584246, + "grad_norm": 0.4560001348381205, + "learning_rate": 3.505321459982961e-05, + "loss": 0.195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10615938901901245, + "step": 2055, + "valid_targets_mean": 6002.4, + "valid_targets_min": 3123 + }, + { + "epoch": 2.148148148148148, + "grad_norm": 0.5319407406930687, + "learning_rate": 3.501892353322376e-05, + "loss": 0.192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09678539633750916, + "step": 2060, + "valid_targets_mean": 6372.2, + "valid_targets_min": 4256 + }, + { + "epoch": 2.1533646322378717, + "grad_norm": 0.47950961533451103, + "learning_rate": 3.498453092112687e-05, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10630330443382263, + "step": 2065, + "valid_targets_mean": 5927.8, + "valid_targets_min": 4753 + }, + { + "epoch": 2.1585811163275954, + "grad_norm": 0.42080270478566445, + "learning_rate": 3.495003699607322e-05, + "loss": 0.1931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09679624438285828, + "step": 2070, + "valid_targets_mean": 6106.1, + "valid_targets_min": 5370 + }, + { + "epoch": 2.1637976004173187, + "grad_norm": 0.46034712618261703, + "learning_rate": 3.49154419912821e-05, + "loss": 0.1977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10798361897468567, + "step": 2075, + "valid_targets_mean": 6240.6, + "valid_targets_min": 4608 + }, + { + "epoch": 2.169014084507042, + "grad_norm": 0.4352247242476144, + "learning_rate": 3.48807461406562e-05, + "loss": 0.197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0918012484908104, + "step": 2080, + "valid_targets_mean": 5733.2, + "valid_targets_min": 3801 + }, + { + "epoch": 2.174230568596766, + "grad_norm": 0.4407232678467677, + "learning_rate": 3.484594967878007e-05, + "loss": 0.195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1037389487028122, + "step": 2085, + "valid_targets_mean": 6430.1, + "valid_targets_min": 4754 + }, + { + "epoch": 2.1794470526864895, + "grad_norm": 0.34960627304706876, + "learning_rate": 3.481105284091847e-05, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07505771517753601, + "step": 2090, + "valid_targets_mean": 7375.2, + "valid_targets_min": 6417 + }, + { + "epoch": 2.184663536776213, + "grad_norm": 0.45785513143113415, + "learning_rate": 3.4776055863014864e-05, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10685893893241882, + "step": 2095, + "valid_targets_mean": 6281.0, + "valid_targets_min": 3468 + }, + { + "epoch": 2.189880020865936, + "grad_norm": 0.4232456380667937, + "learning_rate": 3.474095898168975e-05, + "loss": 0.1962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09517915546894073, + "step": 2100, + "valid_targets_mean": 6532.4, + "valid_targets_min": 5221 + }, + { + "epoch": 2.19509650495566, + "grad_norm": 0.44610436305376705, + "learning_rate": 3.470576243423911e-05, + "loss": 0.1967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08073539286851883, + "step": 2105, + "valid_targets_mean": 5552.6, + "valid_targets_min": 4438 + }, + { + "epoch": 2.2003129890453836, + "grad_norm": 0.4513662405315135, + "learning_rate": 3.467046645863276e-05, + "loss": 0.1908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10223500430583954, + "step": 2110, + "valid_targets_mean": 5878.1, + "valid_targets_min": 3915 + }, + { + "epoch": 2.205529473135107, + "grad_norm": 0.3814994079893458, + "learning_rate": 3.463507129351279e-05, + "loss": 0.1824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07614268362522125, + "step": 2115, + "valid_targets_mean": 7082.6, + "valid_targets_min": 4135 + }, + { + "epoch": 2.2107459572248302, + "grad_norm": 0.43483633458622817, + "learning_rate": 3.459957717819191e-05, + "loss": 0.1859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0981886088848114, + "step": 2120, + "valid_targets_mean": 5936.4, + "valid_targets_min": 3947 + }, + { + "epoch": 2.215962441314554, + "grad_norm": 0.4488628365716425, + "learning_rate": 3.4563984352651874e-05, + "loss": 0.1903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12031988799571991, + "step": 2125, + "valid_targets_mean": 6836.2, + "valid_targets_min": 4865 + }, + { + "epoch": 2.2211789254042777, + "grad_norm": 0.4294359728185862, + "learning_rate": 3.45282930575418e-05, + "loss": 0.1905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09401045739650726, + "step": 2130, + "valid_targets_mean": 6539.4, + "valid_targets_min": 4643 + }, + { + "epoch": 2.226395409494001, + "grad_norm": 0.4331674915522541, + "learning_rate": 3.449250353417661e-05, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09641602635383606, + "step": 2135, + "valid_targets_mean": 6613.6, + "valid_targets_min": 4959 + }, + { + "epoch": 2.2316118935837244, + "grad_norm": 0.41983273021022455, + "learning_rate": 3.445661602453533e-05, + "loss": 0.2061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10822772979736328, + "step": 2140, + "valid_targets_mean": 6044.1, + "valid_targets_min": 4652 + }, + { + "epoch": 2.236828377673448, + "grad_norm": 0.5115433021910075, + "learning_rate": 3.44206307712595e-05, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08335600048303604, + "step": 2145, + "valid_targets_mean": 5893.9, + "valid_targets_min": 4938 + }, + { + "epoch": 2.2420448617631714, + "grad_norm": 0.38354248291557275, + "learning_rate": 3.4384548017651544e-05, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07646828889846802, + "step": 2150, + "valid_targets_mean": 5849.6, + "valid_targets_min": 4546 + }, + { + "epoch": 2.247261345852895, + "grad_norm": 0.43544279425825827, + "learning_rate": 3.4348368007673065e-05, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0876227542757988, + "step": 2155, + "valid_targets_mean": 5751.5, + "valid_targets_min": 3200 + }, + { + "epoch": 2.2524778299426185, + "grad_norm": 0.40177712905944063, + "learning_rate": 3.4312090985943266e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09557884931564331, + "step": 2160, + "valid_targets_mean": 6149.6, + "valid_targets_min": 5010 + }, + { + "epoch": 2.257694314032342, + "grad_norm": 0.42220966188869713, + "learning_rate": 3.4275717197737234e-05, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1044800877571106, + "step": 2165, + "valid_targets_mean": 6551.2, + "valid_targets_min": 4904 + }, + { + "epoch": 2.262910798122066, + "grad_norm": 0.4968418338300447, + "learning_rate": 3.423924688898433e-05, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13534599542617798, + "step": 2170, + "valid_targets_mean": 5728.9, + "valid_targets_min": 4812 + }, + { + "epoch": 2.2681272822117893, + "grad_norm": 0.45935513807093, + "learning_rate": 3.420268030626651e-05, + "loss": 0.1947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08005398511886597, + "step": 2175, + "valid_targets_mean": 5047.9, + "valid_targets_min": 3629 + }, + { + "epoch": 2.2733437663015126, + "grad_norm": 0.5256427898553453, + "learning_rate": 3.416601769681663e-05, + "loss": 0.1841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0951690599322319, + "step": 2180, + "valid_targets_mean": 6260.0, + "valid_targets_min": 4324 + }, + { + "epoch": 2.2785602503912363, + "grad_norm": 0.36723876012905277, + "learning_rate": 3.412925930851683e-05, + "loss": 0.186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08248650282621384, + "step": 2185, + "valid_targets_mean": 7853.1, + "valid_targets_min": 4920 + }, + { + "epoch": 2.28377673448096, + "grad_norm": 0.41022135249002134, + "learning_rate": 3.40924053898968e-05, + "loss": 0.1991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08977551758289337, + "step": 2190, + "valid_targets_mean": 6231.0, + "valid_targets_min": 4882 + }, + { + "epoch": 2.2889932185706834, + "grad_norm": 0.3885802545469293, + "learning_rate": 3.405545619013214e-05, + "loss": 0.2015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09008309245109558, + "step": 2195, + "valid_targets_mean": 7472.9, + "valid_targets_min": 3943 + }, + { + "epoch": 2.2942097026604067, + "grad_norm": 0.4141660733403469, + "learning_rate": 3.401841195904267e-05, + "loss": 0.1908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0991562157869339, + "step": 2200, + "valid_targets_mean": 6840.2, + "valid_targets_min": 5034 + }, + { + "epoch": 2.2994261867501304, + "grad_norm": 0.4446954333219515, + "learning_rate": 3.398127294709072e-05, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09946858882904053, + "step": 2205, + "valid_targets_mean": 6458.5, + "valid_targets_min": 4461 + }, + { + "epoch": 2.3046426708398537, + "grad_norm": 0.4085542167515388, + "learning_rate": 3.3944039405379444e-05, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08701901137828827, + "step": 2210, + "valid_targets_mean": 6645.4, + "valid_targets_min": 5188 + }, + { + "epoch": 2.3098591549295775, + "grad_norm": 0.41325223266803557, + "learning_rate": 3.390671158565115e-05, + "loss": 0.178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10056047886610031, + "step": 2215, + "valid_targets_mean": 6002.9, + "valid_targets_min": 5009 + }, + { + "epoch": 2.315075639019301, + "grad_norm": 0.45155093023180914, + "learning_rate": 3.386928974028555e-05, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10202871263027191, + "step": 2220, + "valid_targets_mean": 6025.9, + "valid_targets_min": 4551 + }, + { + "epoch": 2.3202921231090246, + "grad_norm": 0.37837258822031855, + "learning_rate": 3.3831774122298106e-05, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09777092933654785, + "step": 2225, + "valid_targets_mean": 7210.0, + "valid_targets_min": 4492 + }, + { + "epoch": 2.325508607198748, + "grad_norm": 0.42039385434134624, + "learning_rate": 3.3794164985338266e-05, + "loss": 0.1804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09167613834142685, + "step": 2230, + "valid_targets_mean": 7051.4, + "valid_targets_min": 4718 + }, + { + "epoch": 2.3307250912884716, + "grad_norm": 0.4071650276718329, + "learning_rate": 3.37564625836878e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10083135962486267, + "step": 2235, + "valid_targets_mean": 6686.0, + "valid_targets_min": 4827 + }, + { + "epoch": 2.335941575378195, + "grad_norm": 0.39807715067303884, + "learning_rate": 3.3718667172259026e-05, + "loss": 0.1892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07684303820133209, + "step": 2240, + "valid_targets_mean": 5311.9, + "valid_targets_min": 3828 + }, + { + "epoch": 2.3411580594679187, + "grad_norm": 0.3919805667722109, + "learning_rate": 3.368077900659315e-05, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0803682804107666, + "step": 2245, + "valid_targets_mean": 7078.9, + "valid_targets_min": 4594 + }, + { + "epoch": 2.346374543557642, + "grad_norm": 0.4161057378209624, + "learning_rate": 3.364279834285848e-05, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07646815478801727, + "step": 2250, + "valid_targets_mean": 6393.8, + "valid_targets_min": 4509 + }, + { + "epoch": 2.3515910276473657, + "grad_norm": 0.3979884628751663, + "learning_rate": 3.360472543784875e-05, + "loss": 0.1711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10432074964046478, + "step": 2255, + "valid_targets_mean": 7848.9, + "valid_targets_min": 5649 + }, + { + "epoch": 2.356807511737089, + "grad_norm": 0.3888362912377051, + "learning_rate": 3.356656054898132e-05, + "loss": 0.1725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08038143813610077, + "step": 2260, + "valid_targets_mean": 6623.4, + "valid_targets_min": 4343 + }, + { + "epoch": 2.3620239958268128, + "grad_norm": 0.38734480976270497, + "learning_rate": 3.352830393429547e-05, + "loss": 0.1784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08363331854343414, + "step": 2265, + "valid_targets_mean": 6255.9, + "valid_targets_min": 4660 + }, + { + "epoch": 2.367240479916536, + "grad_norm": 0.36944107089402967, + "learning_rate": 3.3489955852450675e-05, + "loss": 0.1833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09112220257520676, + "step": 2270, + "valid_targets_mean": 8243.0, + "valid_targets_min": 6281 + }, + { + "epoch": 2.37245696400626, + "grad_norm": 0.3666035322733915, + "learning_rate": 3.3451516562724834e-05, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09047673642635345, + "step": 2275, + "valid_targets_mean": 7083.5, + "valid_targets_min": 4657 + }, + { + "epoch": 2.377673448095983, + "grad_norm": 0.43580436111237736, + "learning_rate": 3.341298632501249e-05, + "loss": 0.1859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10701163113117218, + "step": 2280, + "valid_targets_mean": 6399.8, + "valid_targets_min": 5098 + }, + { + "epoch": 2.382889932185707, + "grad_norm": 0.45804704453093464, + "learning_rate": 3.3374365399823134e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0997365266084671, + "step": 2285, + "valid_targets_mean": 5257.4, + "valid_targets_min": 1879 + }, + { + "epoch": 2.38810641627543, + "grad_norm": 0.522960552515654, + "learning_rate": 3.3335654048279395e-05, + "loss": 0.196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11080147325992584, + "step": 2290, + "valid_targets_mean": 5260.5, + "valid_targets_min": 3619 + }, + { + "epoch": 2.393322900365154, + "grad_norm": 0.5757528785495604, + "learning_rate": 3.329685253211528e-05, + "loss": 0.2731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1598043441772461, + "step": 2295, + "valid_targets_mean": 4215.5, + "valid_targets_min": 1142 + }, + { + "epoch": 2.3985393844548772, + "grad_norm": 0.5218837352611313, + "learning_rate": 3.325796111367444e-05, + "loss": 0.2746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12783518433570862, + "step": 2300, + "valid_targets_mean": 4934.0, + "valid_targets_min": 1799 + }, + { + "epoch": 2.403755868544601, + "grad_norm": 0.5257138278553062, + "learning_rate": 3.321898005590835e-05, + "loss": 0.2673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13994050025939941, + "step": 2305, + "valid_targets_mean": 5817.9, + "valid_targets_min": 3015 + }, + { + "epoch": 2.4089723526343243, + "grad_norm": 0.5008699070837472, + "learning_rate": 3.317990962237454e-05, + "loss": 0.2669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13536694645881653, + "step": 2310, + "valid_targets_mean": 6296.1, + "valid_targets_min": 1681 + }, + { + "epoch": 2.414188836724048, + "grad_norm": 0.6013798149501925, + "learning_rate": 3.314075007723487e-05, + "loss": 0.275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12750005722045898, + "step": 2315, + "valid_targets_mean": 3804.1, + "valid_targets_min": 1374 + }, + { + "epoch": 2.4194053208137714, + "grad_norm": 0.5751000882938346, + "learning_rate": 3.3101501685253657e-05, + "loss": 0.2786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13656474649906158, + "step": 2320, + "valid_targets_mean": 3971.6, + "valid_targets_min": 1951 + }, + { + "epoch": 2.424621804903495, + "grad_norm": 0.5439576591666411, + "learning_rate": 3.306216471179594e-05, + "loss": 0.2668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11575556546449661, + "step": 2325, + "valid_targets_mean": 5107.8, + "valid_targets_min": 2042 + }, + { + "epoch": 2.4298382889932184, + "grad_norm": 0.48825541347389834, + "learning_rate": 3.3022739422825686e-05, + "loss": 0.2674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11280100047588348, + "step": 2330, + "valid_targets_mean": 5041.9, + "valid_targets_min": 1831 + }, + { + "epoch": 2.435054773082942, + "grad_norm": 0.5553315577093292, + "learning_rate": 3.2983226084903944e-05, + "loss": 0.2798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16123196482658386, + "step": 2335, + "valid_targets_mean": 4604.1, + "valid_targets_min": 2077 + }, + { + "epoch": 2.4402712571726655, + "grad_norm": 0.5002160675244002, + "learning_rate": 3.294362496518711e-05, + "loss": 0.2717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12175354361534119, + "step": 2340, + "valid_targets_mean": 4612.0, + "valid_targets_min": 2022 + }, + { + "epoch": 2.445487741262389, + "grad_norm": 0.5154528185838769, + "learning_rate": 3.290393633142507e-05, + "loss": 0.278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1598762422800064, + "step": 2345, + "valid_targets_mean": 5350.1, + "valid_targets_min": 1499 + }, + { + "epoch": 2.4507042253521125, + "grad_norm": 0.4851007958777953, + "learning_rate": 3.286416045195943e-05, + "loss": 0.2635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12968409061431885, + "step": 2350, + "valid_targets_mean": 4540.5, + "valid_targets_min": 1013 + }, + { + "epoch": 2.4559207094418363, + "grad_norm": 0.6098854207926244, + "learning_rate": 3.282429759572164e-05, + "loss": 0.2752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1321035623550415, + "step": 2355, + "valid_targets_mean": 4368.1, + "valid_targets_min": 1939 + }, + { + "epoch": 2.4611371935315596, + "grad_norm": 0.6155614604740887, + "learning_rate": 3.2784348032231245e-05, + "loss": 0.2656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1557837575674057, + "step": 2360, + "valid_targets_mean": 3930.9, + "valid_targets_min": 1820 + }, + { + "epoch": 2.4663536776212833, + "grad_norm": 0.628446654269792, + "learning_rate": 3.274431203159402e-05, + "loss": 0.2798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14925658702850342, + "step": 2365, + "valid_targets_mean": 3865.8, + "valid_targets_min": 1589 + }, + { + "epoch": 2.4715701617110066, + "grad_norm": 0.5749531439191033, + "learning_rate": 3.270418986450017e-05, + "loss": 0.2701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13470016419887543, + "step": 2370, + "valid_targets_mean": 4365.5, + "valid_targets_min": 2668 + }, + { + "epoch": 2.4767866458007304, + "grad_norm": 0.6112944450008992, + "learning_rate": 3.266398180222247e-05, + "loss": 0.2704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14018294215202332, + "step": 2375, + "valid_targets_mean": 3969.9, + "valid_targets_min": 1318 + }, + { + "epoch": 2.4820031298904537, + "grad_norm": 0.758943812757189, + "learning_rate": 3.262368811661446e-05, + "loss": 0.2869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13734371960163116, + "step": 2380, + "valid_targets_mean": 3546.0, + "valid_targets_min": 2058 + }, + { + "epoch": 2.4872196139801774, + "grad_norm": 0.6129201166518291, + "learning_rate": 3.2583309080108576e-05, + "loss": 0.2807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11807530373334885, + "step": 2385, + "valid_targets_mean": 2989.2, + "valid_targets_min": 1128 + }, + { + "epoch": 2.4924360980699007, + "grad_norm": 0.601483756249485, + "learning_rate": 3.254284496571434e-05, + "loss": 0.2684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14153246581554413, + "step": 2390, + "valid_targets_mean": 3647.8, + "valid_targets_min": 2001 + }, + { + "epoch": 2.4976525821596245, + "grad_norm": 0.5885882968338928, + "learning_rate": 3.25022960470165e-05, + "loss": 0.2765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13896076381206512, + "step": 2395, + "valid_targets_mean": 4171.5, + "valid_targets_min": 1267 + }, + { + "epoch": 2.502869066249348, + "grad_norm": 0.543453585812956, + "learning_rate": 3.246166259817318e-05, + "loss": 0.265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13512207567691803, + "step": 2400, + "valid_targets_mean": 4544.0, + "valid_targets_min": 3111 + }, + { + "epoch": 2.5080855503390715, + "grad_norm": 0.6860074350404666, + "learning_rate": 3.242094489391402e-05, + "loss": 0.2547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1351841539144516, + "step": 2405, + "valid_targets_mean": 2999.6, + "valid_targets_min": 1403 + }, + { + "epoch": 2.513302034428795, + "grad_norm": 0.6361770165463826, + "learning_rate": 3.238014320953832e-05, + "loss": 0.27, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1668992042541504, + "step": 2410, + "valid_targets_mean": 4133.2, + "valid_targets_min": 1588 + }, + { + "epoch": 2.5185185185185186, + "grad_norm": 0.6973211945209596, + "learning_rate": 3.233925782091322e-05, + "loss": 0.2744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13658380508422852, + "step": 2415, + "valid_targets_mean": 3115.0, + "valid_targets_min": 1363 + }, + { + "epoch": 2.523735002608242, + "grad_norm": 0.5333138372686862, + "learning_rate": 3.229828900447174e-05, + "loss": 0.2649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10996310412883759, + "step": 2420, + "valid_targets_mean": 4001.2, + "valid_targets_min": 1631 + }, + { + "epoch": 2.5289514866979657, + "grad_norm": 0.563712230363288, + "learning_rate": 3.2257237037211026e-05, + "loss": 0.2677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16854894161224365, + "step": 2425, + "valid_targets_mean": 5059.0, + "valid_targets_min": 3090 + }, + { + "epoch": 2.534167970787689, + "grad_norm": 0.6178248005121174, + "learning_rate": 3.221610219669038e-05, + "loss": 0.2589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16474637389183044, + "step": 2430, + "valid_targets_mean": 3915.0, + "valid_targets_min": 1802 + }, + { + "epoch": 2.5393844548774127, + "grad_norm": 0.649533691332809, + "learning_rate": 3.2174884761029456e-05, + "loss": 0.2716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16140121221542358, + "step": 2435, + "valid_targets_mean": 3672.2, + "valid_targets_min": 1685 + }, + { + "epoch": 2.544600938967136, + "grad_norm": 0.5531517313330834, + "learning_rate": 3.2133585008906307e-05, + "loss": 0.2576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12701262533664703, + "step": 2440, + "valid_targets_mean": 4007.2, + "valid_targets_min": 2292 + }, + { + "epoch": 2.5498174230568598, + "grad_norm": 0.5750112160866443, + "learning_rate": 3.209220321955559e-05, + "loss": 0.2618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13344469666481018, + "step": 2445, + "valid_targets_mean": 4008.5, + "valid_targets_min": 2060 + }, + { + "epoch": 2.555033907146583, + "grad_norm": 0.6879304235082159, + "learning_rate": 3.205073967276659e-05, + "loss": 0.2649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1329180896282196, + "step": 2450, + "valid_targets_mean": 3193.8, + "valid_targets_min": 1458 + }, + { + "epoch": 2.560250391236307, + "grad_norm": 0.636213689614092, + "learning_rate": 3.20091946488814e-05, + "loss": 0.2669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1360282599925995, + "step": 2455, + "valid_targets_mean": 4109.6, + "valid_targets_min": 1512 + }, + { + "epoch": 2.56546687532603, + "grad_norm": 0.6879742145181917, + "learning_rate": 3.196756842879297e-05, + "loss": 0.2599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11506295949220657, + "step": 2460, + "valid_targets_mean": 2608.5, + "valid_targets_min": 1480 + }, + { + "epoch": 2.570683359415754, + "grad_norm": 0.6129634901248588, + "learning_rate": 3.1925861293943234e-05, + "loss": 0.273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1380959302186966, + "step": 2465, + "valid_targets_mean": 3886.5, + "valid_targets_min": 1229 + }, + { + "epoch": 2.575899843505477, + "grad_norm": 0.6437485341032593, + "learning_rate": 3.1884073526321216e-05, + "loss": 0.2713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12944695353507996, + "step": 2470, + "valid_targets_mean": 3347.1, + "valid_targets_min": 1276 + }, + { + "epoch": 2.581116327595201, + "grad_norm": 0.5891943527405692, + "learning_rate": 3.18422054084611e-05, + "loss": 0.2599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12563681602478027, + "step": 2475, + "valid_targets_mean": 3769.8, + "valid_targets_min": 1357 + }, + { + "epoch": 2.5863328116849242, + "grad_norm": 0.6690233677514072, + "learning_rate": 3.180025722344034e-05, + "loss": 0.2571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1363009363412857, + "step": 2480, + "valid_targets_mean": 3142.2, + "valid_targets_min": 924 + }, + { + "epoch": 2.591549295774648, + "grad_norm": 0.6086142401293023, + "learning_rate": 3.175822925487774e-05, + "loss": 0.2697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13353917002677917, + "step": 2485, + "valid_targets_mean": 4241.0, + "valid_targets_min": 2089 + }, + { + "epoch": 2.5967657798643713, + "grad_norm": 0.6205520399204519, + "learning_rate": 3.171612178693151e-05, + "loss": 0.2646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12711051106452942, + "step": 2490, + "valid_targets_mean": 3015.9, + "valid_targets_min": 1643 + }, + { + "epoch": 2.601982263954095, + "grad_norm": 0.6142982740974111, + "learning_rate": 3.1673935104297414e-05, + "loss": 0.2555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13295480608940125, + "step": 2495, + "valid_targets_mean": 4017.5, + "valid_targets_min": 1244 + }, + { + "epoch": 2.6071987480438183, + "grad_norm": 0.6629556042094129, + "learning_rate": 3.163166949220675e-05, + "loss": 0.2737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1332044005393982, + "step": 2500, + "valid_targets_mean": 4399.2, + "valid_targets_min": 730 + }, + { + "epoch": 2.612415232133542, + "grad_norm": 0.6602819951333359, + "learning_rate": 3.158932523642451e-05, + "loss": 0.2616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09878230839967728, + "step": 2505, + "valid_targets_mean": 2727.6, + "valid_targets_min": 977 + }, + { + "epoch": 2.6176317162232654, + "grad_norm": 0.5936619436667753, + "learning_rate": 3.1546902623247385e-05, + "loss": 0.2625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15224990248680115, + "step": 2510, + "valid_targets_mean": 4673.0, + "valid_targets_min": 2091 + }, + { + "epoch": 2.622848200312989, + "grad_norm": 0.6288898913259299, + "learning_rate": 3.1504401939501866e-05, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11980222165584564, + "step": 2515, + "valid_targets_mean": 3184.4, + "valid_targets_min": 1798 + }, + { + "epoch": 2.6280646844027125, + "grad_norm": 0.656499030179585, + "learning_rate": 3.146182347254228e-05, + "loss": 0.2708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12986360490322113, + "step": 2520, + "valid_targets_mean": 3983.6, + "valid_targets_min": 1432 + }, + { + "epoch": 2.633281168492436, + "grad_norm": 0.581794350195772, + "learning_rate": 3.141916751024889e-05, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12275084853172302, + "step": 2525, + "valid_targets_mean": 3666.4, + "valid_targets_min": 1261 + }, + { + "epoch": 2.6384976525821595, + "grad_norm": 0.6413467747069259, + "learning_rate": 3.137643434102588e-05, + "loss": 0.2706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16420862078666687, + "step": 2530, + "valid_targets_mean": 4252.8, + "valid_targets_min": 2737 + }, + { + "epoch": 2.6437141366718833, + "grad_norm": 0.6247923367477827, + "learning_rate": 3.1333624253799464e-05, + "loss": 0.261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11158216744661331, + "step": 2535, + "valid_targets_mean": 3150.9, + "valid_targets_min": 1219 + }, + { + "epoch": 2.6489306207616066, + "grad_norm": 0.6301650361759836, + "learning_rate": 3.1290737538015904e-05, + "loss": 0.2701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12007425725460052, + "step": 2540, + "valid_targets_mean": 3408.0, + "valid_targets_min": 2058 + }, + { + "epoch": 2.6541471048513303, + "grad_norm": 0.7514198249538541, + "learning_rate": 3.1247774483639575e-05, + "loss": 0.2554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11466147005558014, + "step": 2545, + "valid_targets_mean": 2731.5, + "valid_targets_min": 916 + }, + { + "epoch": 2.6593635889410536, + "grad_norm": 0.5963508036840182, + "learning_rate": 3.120473538115096e-05, + "loss": 0.2731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11840688437223434, + "step": 2550, + "valid_targets_mean": 3799.5, + "valid_targets_min": 1302 + }, + { + "epoch": 2.6645800730307774, + "grad_norm": 0.6668534121427236, + "learning_rate": 3.116162052154476e-05, + "loss": 0.2538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16341936588287354, + "step": 2555, + "valid_targets_mean": 3893.1, + "valid_targets_min": 1500 + }, + { + "epoch": 2.6697965571205007, + "grad_norm": 0.6295036473273128, + "learning_rate": 3.111843019632784e-05, + "loss": 0.2473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1511596441268921, + "step": 2560, + "valid_targets_mean": 4703.6, + "valid_targets_min": 2803 + }, + { + "epoch": 2.6750130412102244, + "grad_norm": 0.5742105781670456, + "learning_rate": 3.1075164697517326e-05, + "loss": 0.2415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12208431214094162, + "step": 2565, + "valid_targets_mean": 3884.5, + "valid_targets_min": 878 + }, + { + "epoch": 2.6802295252999477, + "grad_norm": 0.606637973146915, + "learning_rate": 3.10318243176386e-05, + "loss": 0.2699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13517910242080688, + "step": 2570, + "valid_targets_mean": 3935.0, + "valid_targets_min": 2662 + }, + { + "epoch": 2.6854460093896715, + "grad_norm": 0.5884950394733762, + "learning_rate": 3.0988409349723317e-05, + "loss": 0.2581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11802235245704651, + "step": 2575, + "valid_targets_mean": 3903.6, + "valid_targets_min": 1086 + }, + { + "epoch": 2.690662493479395, + "grad_norm": 0.5633164600160636, + "learning_rate": 3.094492008730746e-05, + "loss": 0.26, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13814407587051392, + "step": 2580, + "valid_targets_mean": 5394.1, + "valid_targets_min": 1566 + }, + { + "epoch": 2.6958789775691185, + "grad_norm": 0.5879534756141341, + "learning_rate": 3.09013568244293e-05, + "loss": 0.26, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15025027096271515, + "step": 2585, + "valid_targets_mean": 3978.9, + "valid_targets_min": 2504 + }, + { + "epoch": 2.701095461658842, + "grad_norm": 0.6563301285087387, + "learning_rate": 3.085771985562745e-05, + "loss": 0.2668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1593698114156723, + "step": 2590, + "valid_targets_mean": 4021.4, + "valid_targets_min": 1548 + }, + { + "epoch": 2.7063119457485656, + "grad_norm": 0.648145899736352, + "learning_rate": 3.081400947593887e-05, + "loss": 0.2611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11143846809864044, + "step": 2595, + "valid_targets_mean": 2818.6, + "valid_targets_min": 1352 + }, + { + "epoch": 2.711528429838289, + "grad_norm": 0.5610216066006339, + "learning_rate": 3.077022598089685e-05, + "loss": 0.2552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1364215612411499, + "step": 2600, + "valid_targets_mean": 5038.8, + "valid_targets_min": 2339 + }, + { + "epoch": 2.7167449139280127, + "grad_norm": 0.5888988632251233, + "learning_rate": 3.072636966652904e-05, + "loss": 0.2518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12690719962120056, + "step": 2605, + "valid_targets_mean": 4025.4, + "valid_targets_min": 2015 + }, + { + "epoch": 2.721961398017736, + "grad_norm": 0.57396727340931, + "learning_rate": 3.0682440829355416e-05, + "loss": 0.2514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13772521913051605, + "step": 2610, + "valid_targets_mean": 4755.2, + "valid_targets_min": 1731 + }, + { + "epoch": 2.7271778821074597, + "grad_norm": 0.6696393083367386, + "learning_rate": 3.06384397663863e-05, + "loss": 0.2512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10406316071748734, + "step": 2615, + "valid_targets_mean": 2545.1, + "valid_targets_min": 1460 + }, + { + "epoch": 2.732394366197183, + "grad_norm": 0.6291440690585867, + "learning_rate": 3.059436677512035e-05, + "loss": 0.2448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13974426686763763, + "step": 2620, + "valid_targets_mean": 4214.9, + "valid_targets_min": 1282 + }, + { + "epoch": 2.7376108502869068, + "grad_norm": 0.6217373794138128, + "learning_rate": 3.055022215354254e-05, + "loss": 0.2417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11521928757429123, + "step": 2625, + "valid_targets_mean": 3325.1, + "valid_targets_min": 1421 + }, + { + "epoch": 2.74282733437663, + "grad_norm": 0.5971030771585415, + "learning_rate": 3.0506006200122154e-05, + "loss": 0.2448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13960281014442444, + "step": 2630, + "valid_targets_mean": 4091.0, + "valid_targets_min": 1764 + }, + { + "epoch": 2.748043818466354, + "grad_norm": 0.7129536496641111, + "learning_rate": 3.0461719213810756e-05, + "loss": 0.268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14494164288043976, + "step": 2635, + "valid_targets_mean": 2931.1, + "valid_targets_min": 1175 + }, + { + "epoch": 2.753260302556077, + "grad_norm": 0.5449647984572306, + "learning_rate": 3.0417361494040167e-05, + "loss": 0.2595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11925499141216278, + "step": 2640, + "valid_targets_mean": 4426.1, + "valid_targets_min": 2546 + }, + { + "epoch": 2.758476786645801, + "grad_norm": 0.6889943197664462, + "learning_rate": 3.037293334072047e-05, + "loss": 0.2562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1619665026664734, + "step": 2645, + "valid_targets_mean": 4051.2, + "valid_targets_min": 1244 + }, + { + "epoch": 2.763693270735524, + "grad_norm": 0.5590770499677077, + "learning_rate": 3.0328435054237944e-05, + "loss": 0.2554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14281776547431946, + "step": 2650, + "valid_targets_mean": 4871.5, + "valid_targets_min": 3056 + }, + { + "epoch": 2.768909754825248, + "grad_norm": 0.579260504876943, + "learning_rate": 3.0283866935453057e-05, + "loss": 0.2513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12741178274154663, + "step": 2655, + "valid_targets_mean": 3943.5, + "valid_targets_min": 1948 + }, + { + "epoch": 2.7741262389149712, + "grad_norm": 0.5126136144210439, + "learning_rate": 3.023922928569843e-05, + "loss": 0.2473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1161690428853035, + "step": 2660, + "valid_targets_mean": 4304.1, + "valid_targets_min": 1558 + }, + { + "epoch": 2.779342723004695, + "grad_norm": 0.5154900381619515, + "learning_rate": 3.019452240677678e-05, + "loss": 0.2501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10638076066970825, + "step": 2665, + "valid_targets_mean": 4116.9, + "valid_targets_min": 1145 + }, + { + "epoch": 2.7845592070944183, + "grad_norm": 0.6051304011759127, + "learning_rate": 3.0149746600958908e-05, + "loss": 0.2526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1423228681087494, + "step": 2670, + "valid_targets_mean": 3832.9, + "valid_targets_min": 1428 + }, + { + "epoch": 2.789775691184142, + "grad_norm": 0.5933593014958175, + "learning_rate": 3.0104902170981633e-05, + "loss": 0.2516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15102079510688782, + "step": 2675, + "valid_targets_mean": 4549.4, + "valid_targets_min": 1708 + }, + { + "epoch": 2.7949921752738653, + "grad_norm": 0.671973221388065, + "learning_rate": 3.005998942004576e-05, + "loss": 0.2495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10356669127941132, + "step": 2680, + "valid_targets_mean": 3170.9, + "valid_targets_min": 1301 + }, + { + "epoch": 2.800208659363589, + "grad_norm": 0.6580979632037579, + "learning_rate": 3.0015008651814023e-05, + "loss": 0.2665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12527771294116974, + "step": 2685, + "valid_targets_mean": 3377.1, + "valid_targets_min": 1971 + }, + { + "epoch": 2.8054251434533124, + "grad_norm": 0.5710905469018628, + "learning_rate": 2.9969960170409033e-05, + "loss": 0.2654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14853718876838684, + "step": 2690, + "valid_targets_mean": 4398.6, + "valid_targets_min": 1054 + }, + { + "epoch": 2.810641627543036, + "grad_norm": 0.5948033869486571, + "learning_rate": 2.9924844280411208e-05, + "loss": 0.2601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14837758243083954, + "step": 2695, + "valid_targets_mean": 4018.1, + "valid_targets_min": 1612 + }, + { + "epoch": 2.8158581116327595, + "grad_norm": 0.6790879069804648, + "learning_rate": 2.987966128685674e-05, + "loss": 0.2692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10446201264858246, + "step": 2700, + "valid_targets_mean": 3404.1, + "valid_targets_min": 721 + }, + { + "epoch": 2.821074595722483, + "grad_norm": 0.5783174413231363, + "learning_rate": 2.9834411495235526e-05, + "loss": 0.2563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12726347148418427, + "step": 2705, + "valid_targets_mean": 4015.5, + "valid_targets_min": 1625 + }, + { + "epoch": 2.8262910798122065, + "grad_norm": 0.6159690706996529, + "learning_rate": 2.9789095211489082e-05, + "loss": 0.2678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1302340030670166, + "step": 2710, + "valid_targets_mean": 4242.8, + "valid_targets_min": 1381 + }, + { + "epoch": 2.8315075639019303, + "grad_norm": 0.6505143337012326, + "learning_rate": 2.9743712742008486e-05, + "loss": 0.2724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13252034783363342, + "step": 2715, + "valid_targets_mean": 3441.9, + "valid_targets_min": 998 + }, + { + "epoch": 2.8367240479916536, + "grad_norm": 0.6023200393336473, + "learning_rate": 2.9698264393632326e-05, + "loss": 0.2559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10933835804462433, + "step": 2720, + "valid_targets_mean": 3983.0, + "valid_targets_min": 1186 + }, + { + "epoch": 2.8419405320813773, + "grad_norm": 0.7548174268471262, + "learning_rate": 2.9652750473644597e-05, + "loss": 0.2619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15246373414993286, + "step": 2725, + "valid_targets_mean": 3655.4, + "valid_targets_min": 1195 + }, + { + "epoch": 2.8471570161711006, + "grad_norm": 0.6465522519597299, + "learning_rate": 2.9607171289772635e-05, + "loss": 0.2627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12623417377471924, + "step": 2730, + "valid_targets_mean": 2551.0, + "valid_targets_min": 885 + }, + { + "epoch": 2.8523735002608244, + "grad_norm": 0.6388235229438765, + "learning_rate": 2.9561527150185035e-05, + "loss": 0.2645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1198853850364685, + "step": 2735, + "valid_targets_mean": 3153.6, + "valid_targets_min": 814 + }, + { + "epoch": 2.8575899843505477, + "grad_norm": 0.6170945295874617, + "learning_rate": 2.9515818363489582e-05, + "loss": 0.2608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11560390144586563, + "step": 2740, + "valid_targets_mean": 3468.2, + "valid_targets_min": 2174 + }, + { + "epoch": 2.8628064684402714, + "grad_norm": 0.5309890065820382, + "learning_rate": 2.9470045238731127e-05, + "loss": 0.2438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14706578850746155, + "step": 2745, + "valid_targets_mean": 5480.9, + "valid_targets_min": 3025 + }, + { + "epoch": 2.8680229525299947, + "grad_norm": 0.6536604992748987, + "learning_rate": 2.9424208085389544e-05, + "loss": 0.2555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13016174733638763, + "step": 2750, + "valid_targets_mean": 3307.9, + "valid_targets_min": 1434 + }, + { + "epoch": 2.873239436619718, + "grad_norm": 0.8084730346631612, + "learning_rate": 2.9378307213377603e-05, + "loss": 0.2461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12233893573284149, + "step": 2755, + "valid_targets_mean": 3271.1, + "valid_targets_min": 1074 + }, + { + "epoch": 2.878455920709442, + "grad_norm": 0.5955440345550854, + "learning_rate": 2.93323429330389e-05, + "loss": 0.2633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11668693274259567, + "step": 2760, + "valid_targets_mean": 3626.1, + "valid_targets_min": 1510 + }, + { + "epoch": 2.8836724047991655, + "grad_norm": 0.6011732583306233, + "learning_rate": 2.9286315555145718e-05, + "loss": 0.2545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13229070603847504, + "step": 2765, + "valid_targets_mean": 4041.6, + "valid_targets_min": 1885 + }, + { + "epoch": 2.888888888888889, + "grad_norm": 0.5918453420557539, + "learning_rate": 2.924022539089698e-05, + "loss": 0.262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12244449555873871, + "step": 2770, + "valid_targets_mean": 5077.0, + "valid_targets_min": 3139 + }, + { + "epoch": 2.894105372978612, + "grad_norm": 0.6349993371229355, + "learning_rate": 2.9194072751916106e-05, + "loss": 0.2534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14885400235652924, + "step": 2775, + "valid_targets_mean": 4221.8, + "valid_targets_min": 2598 + }, + { + "epoch": 2.899321857068336, + "grad_norm": 0.6243493274176214, + "learning_rate": 2.914785795024893e-05, + "loss": 0.2572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11400054395198822, + "step": 2780, + "valid_targets_mean": 3226.0, + "valid_targets_min": 1460 + }, + { + "epoch": 2.9045383411580596, + "grad_norm": 0.6296228981918444, + "learning_rate": 2.9101581298361563e-05, + "loss": 0.2583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14952462911605835, + "step": 2785, + "valid_targets_mean": 4045.0, + "valid_targets_min": 1084 + }, + { + "epoch": 2.909754825247783, + "grad_norm": 0.6099492599616824, + "learning_rate": 2.90552431091383e-05, + "loss": 0.2445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1244485080242157, + "step": 2790, + "valid_targets_mean": 3752.2, + "valid_targets_min": 1618 + }, + { + "epoch": 2.9149713093375063, + "grad_norm": 0.6791585182138435, + "learning_rate": 2.9008843695879508e-05, + "loss": 0.2426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1301327347755432, + "step": 2795, + "valid_targets_mean": 3573.1, + "valid_targets_min": 1193 + }, + { + "epoch": 2.92018779342723, + "grad_norm": 0.5751891695314546, + "learning_rate": 2.896238337229949e-05, + "loss": 0.2493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1292528212070465, + "step": 2800, + "valid_targets_mean": 4408.8, + "valid_targets_min": 2092 + }, + { + "epoch": 2.9254042775169538, + "grad_norm": 0.5656710966099417, + "learning_rate": 2.891586245252439e-05, + "loss": 0.2494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14530359208583832, + "step": 2805, + "valid_targets_mean": 4397.6, + "valid_targets_min": 2786 + }, + { + "epoch": 2.930620761606677, + "grad_norm": 0.7236482854491971, + "learning_rate": 2.886928125109003e-05, + "loss": 0.2419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10000090301036835, + "step": 2810, + "valid_targets_mean": 4088.1, + "valid_targets_min": 1246 + }, + { + "epoch": 2.9358372456964004, + "grad_norm": 0.5863098043495419, + "learning_rate": 2.882264008293982e-05, + "loss": 0.2406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10833729803562164, + "step": 2815, + "valid_targets_mean": 3414.8, + "valid_targets_min": 1484 + }, + { + "epoch": 2.941053729786124, + "grad_norm": 0.5673965320356554, + "learning_rate": 2.8775939263422615e-05, + "loss": 0.2522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1349630206823349, + "step": 2820, + "valid_targets_mean": 4064.5, + "valid_targets_min": 1502 + }, + { + "epoch": 2.946270213875848, + "grad_norm": 0.7454699758241563, + "learning_rate": 2.872917910829059e-05, + "loss": 0.2466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11518590152263641, + "step": 2825, + "valid_targets_mean": 4174.1, + "valid_targets_min": 2134 + }, + { + "epoch": 2.951486697965571, + "grad_norm": 0.6792669942825563, + "learning_rate": 2.8682359933697075e-05, + "loss": 0.2723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12189282476902008, + "step": 2830, + "valid_targets_mean": 2993.4, + "valid_targets_min": 1312 + }, + { + "epoch": 2.9567031820552945, + "grad_norm": 0.5631340887456594, + "learning_rate": 2.863548205619447e-05, + "loss": 0.2487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1314758062362671, + "step": 2835, + "valid_targets_mean": 4827.2, + "valid_targets_min": 2738 + }, + { + "epoch": 2.9619196661450182, + "grad_norm": 0.603833383776037, + "learning_rate": 2.8588545792732056e-05, + "loss": 0.2469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12522263824939728, + "step": 2840, + "valid_targets_mean": 3916.9, + "valid_targets_min": 1759 + }, + { + "epoch": 2.967136150234742, + "grad_norm": 0.611802615806098, + "learning_rate": 2.8541551460653875e-05, + "loss": 0.245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11957676708698273, + "step": 2845, + "valid_targets_mean": 3551.9, + "valid_targets_min": 2155 + }, + { + "epoch": 2.9723526343244653, + "grad_norm": 0.7228629967918775, + "learning_rate": 2.8494499377696586e-05, + "loss": 0.2555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12652847170829773, + "step": 2850, + "valid_targets_mean": 3097.1, + "valid_targets_min": 2048 + }, + { + "epoch": 2.9775691184141886, + "grad_norm": 0.6317765793251472, + "learning_rate": 2.8447389861987295e-05, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10699822008609772, + "step": 2855, + "valid_targets_mean": 2883.6, + "valid_targets_min": 1233 + }, + { + "epoch": 2.9827856025039123, + "grad_norm": 0.54213287730834, + "learning_rate": 2.8400223232041456e-05, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10821182280778885, + "step": 2860, + "valid_targets_mean": 3688.1, + "valid_targets_min": 1383 + }, + { + "epoch": 2.988002086593636, + "grad_norm": 0.6007123848400694, + "learning_rate": 2.835299980676064e-05, + "loss": 0.2586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12923848628997803, + "step": 2865, + "valid_targets_mean": 4248.2, + "valid_targets_min": 1800 + }, + { + "epoch": 2.9932185706833594, + "grad_norm": 0.6205748175131593, + "learning_rate": 2.8305719905430442e-05, + "loss": 0.2507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11788716912269592, + "step": 2870, + "valid_targets_mean": 3262.4, + "valid_targets_min": 2327 + }, + { + "epoch": 2.9984350547730827, + "grad_norm": 0.6241517259904573, + "learning_rate": 2.8258383847718306e-05, + "loss": 0.2513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13496685028076172, + "step": 2875, + "valid_targets_mean": 3481.8, + "valid_targets_min": 1214 + }, + { + "epoch": 3.003129890453834, + "grad_norm": 0.44180514174907837, + "learning_rate": 2.821099195367135e-05, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08082979172468185, + "step": 2880, + "valid_targets_mean": 6116.6, + "valid_targets_min": 4858 + }, + { + "epoch": 3.0083463745435575, + "grad_norm": 0.44694906623562963, + "learning_rate": 2.8163544543714216e-05, + "loss": 0.1803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08725933730602264, + "step": 2885, + "valid_targets_mean": 6616.0, + "valid_targets_min": 4759 + }, + { + "epoch": 3.013562858633281, + "grad_norm": 0.44526606191032503, + "learning_rate": 2.811604193864689e-05, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08093272149562836, + "step": 2890, + "valid_targets_mean": 5258.1, + "valid_targets_min": 4373 + }, + { + "epoch": 3.0187793427230045, + "grad_norm": 0.5016676991593283, + "learning_rate": 2.806848445964255e-05, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09734293818473816, + "step": 2895, + "valid_targets_mean": 5849.6, + "valid_targets_min": 4813 + }, + { + "epoch": 3.0239958268127283, + "grad_norm": 0.4555904188317003, + "learning_rate": 2.8020872428245382e-05, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09488673508167267, + "step": 2900, + "valid_targets_mean": 5604.5, + "valid_targets_min": 3983 + }, + { + "epoch": 3.0292123109024516, + "grad_norm": 0.40849176087301287, + "learning_rate": 2.797320616636841e-05, + "loss": 0.1781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08584165573120117, + "step": 2905, + "valid_targets_mean": 6438.2, + "valid_targets_min": 5154 + }, + { + "epoch": 3.0344287949921753, + "grad_norm": 0.4240322353002282, + "learning_rate": 2.792548599629132e-05, + "loss": 0.185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0781969428062439, + "step": 2910, + "valid_targets_mean": 5509.6, + "valid_targets_min": 5040 + }, + { + "epoch": 3.0396452790818986, + "grad_norm": 0.47734354193593637, + "learning_rate": 2.787771224065829e-05, + "loss": 0.1731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12022747099399567, + "step": 2915, + "valid_targets_mean": 6410.5, + "valid_targets_min": 5066 + }, + { + "epoch": 3.0448617631716224, + "grad_norm": 0.4298442155405438, + "learning_rate": 2.782988522247578e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0786404013633728, + "step": 2920, + "valid_targets_mean": 6616.2, + "valid_targets_min": 4500 + }, + { + "epoch": 3.0500782472613457, + "grad_norm": 0.406705416089344, + "learning_rate": 2.7782005265110375e-05, + "loss": 0.177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10133657604455948, + "step": 2925, + "valid_targets_mean": 7507.6, + "valid_targets_min": 5455 + }, + { + "epoch": 3.0552947313510694, + "grad_norm": 0.4220750997761556, + "learning_rate": 2.7734072692286604e-05, + "loss": 0.1805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09893888235092163, + "step": 2930, + "valid_targets_mean": 7383.8, + "valid_targets_min": 4715 + }, + { + "epoch": 3.0605112154407927, + "grad_norm": 0.3986150581083876, + "learning_rate": 2.768608782808472e-05, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08655507117509842, + "step": 2935, + "valid_targets_mean": 6624.9, + "valid_targets_min": 5315 + }, + { + "epoch": 3.0657276995305165, + "grad_norm": 0.435070490768091, + "learning_rate": 2.763805099693854e-05, + "loss": 0.1792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09388799965381622, + "step": 2940, + "valid_targets_mean": 7394.5, + "valid_targets_min": 4353 + }, + { + "epoch": 3.07094418362024, + "grad_norm": 0.46136458302199346, + "learning_rate": 2.7589962523633218e-05, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08169777691364288, + "step": 2945, + "valid_targets_mean": 6296.0, + "valid_targets_min": 4890 + }, + { + "epoch": 3.0761606677099635, + "grad_norm": 0.4496191721401737, + "learning_rate": 2.75418227333031e-05, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07177402079105377, + "step": 2950, + "valid_targets_mean": 5848.5, + "valid_targets_min": 4560 + }, + { + "epoch": 3.081377151799687, + "grad_norm": 0.46522372936912193, + "learning_rate": 2.749363195142947e-05, + "loss": 0.1786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09124190360307693, + "step": 2955, + "valid_targets_mean": 5550.9, + "valid_targets_min": 3640 + }, + { + "epoch": 3.0865936358894106, + "grad_norm": 0.5178878026707332, + "learning_rate": 2.744539050383838e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09886413812637329, + "step": 2960, + "valid_targets_mean": 5894.4, + "valid_targets_min": 3967 + }, + { + "epoch": 3.091810119979134, + "grad_norm": 0.4665945749236003, + "learning_rate": 2.7397098716698463e-05, + "loss": 0.1536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08168807625770569, + "step": 2965, + "valid_targets_mean": 6153.9, + "valid_targets_min": 4521 + }, + { + "epoch": 3.0970266040688577, + "grad_norm": 0.4321043669551146, + "learning_rate": 2.7348756916518663e-05, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0813293531537056, + "step": 2970, + "valid_targets_mean": 6440.5, + "valid_targets_min": 5067 + }, + { + "epoch": 3.102243088158581, + "grad_norm": 0.5451863365319071, + "learning_rate": 2.7300365430146117e-05, + "loss": 0.1631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07739552855491638, + "step": 2975, + "valid_targets_mean": 6049.2, + "valid_targets_min": 4385 + }, + { + "epoch": 3.1074595722483047, + "grad_norm": 0.3687652847212978, + "learning_rate": 2.7251924584763867e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07982498407363892, + "step": 2980, + "valid_targets_mean": 7771.6, + "valid_targets_min": 4566 + }, + { + "epoch": 3.112676056338028, + "grad_norm": 0.3856559212500589, + "learning_rate": 2.7203434707888684e-05, + "loss": 0.1713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08336256444454193, + "step": 2985, + "valid_targets_mean": 6633.6, + "valid_targets_min": 5067 + }, + { + "epoch": 3.1178925404277518, + "grad_norm": 0.4106217915396555, + "learning_rate": 2.7154896127368872e-05, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08375629782676697, + "step": 2990, + "valid_targets_mean": 6862.6, + "valid_targets_min": 5178 + }, + { + "epoch": 3.123109024517475, + "grad_norm": 0.3798005746927241, + "learning_rate": 2.7106309171381995e-05, + "loss": 0.1796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08653640747070312, + "step": 2995, + "valid_targets_mean": 6075.1, + "valid_targets_min": 3854 + }, + { + "epoch": 3.128325508607199, + "grad_norm": 0.4074884618970528, + "learning_rate": 2.705767416843272e-05, + "loss": 0.1962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10451126098632812, + "step": 3000, + "valid_targets_mean": 6840.1, + "valid_targets_min": 4743 + }, + { + "epoch": 3.133541992696922, + "grad_norm": 0.4478594819683904, + "learning_rate": 2.700899144735055e-05, + "loss": 0.2081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10994894802570343, + "step": 3005, + "valid_targets_mean": 6480.9, + "valid_targets_min": 4761 + }, + { + "epoch": 3.138758476786646, + "grad_norm": 0.43611762359508544, + "learning_rate": 2.696026133728763e-05, + "loss": 0.1788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08234615623950958, + "step": 3010, + "valid_targets_mean": 6224.5, + "valid_targets_min": 4789 + }, + { + "epoch": 3.143974960876369, + "grad_norm": 0.41932174443787257, + "learning_rate": 2.69114841677165e-05, + "loss": 0.1798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08948376029729843, + "step": 3015, + "valid_targets_mean": 6686.4, + "valid_targets_min": 5403 + }, + { + "epoch": 3.149191444966093, + "grad_norm": 0.428196972381538, + "learning_rate": 2.6862660268427885e-05, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08405810594558716, + "step": 3020, + "valid_targets_mean": 6285.0, + "valid_targets_min": 4970 + }, + { + "epoch": 3.1544079290558162, + "grad_norm": 0.4218355906982827, + "learning_rate": 2.6813789969528454e-05, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09993162751197815, + "step": 3025, + "valid_targets_mean": 6926.9, + "valid_targets_min": 5299 + }, + { + "epoch": 3.15962441314554, + "grad_norm": 0.4204570828384931, + "learning_rate": 2.6764873601438588e-05, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08472225815057755, + "step": 3030, + "valid_targets_mean": 6121.8, + "valid_targets_min": 4951 + }, + { + "epoch": 3.1648408972352633, + "grad_norm": 0.4036763638948504, + "learning_rate": 2.6715911494890163e-05, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09032890200614929, + "step": 3035, + "valid_targets_mean": 8113.1, + "valid_targets_min": 4794 + }, + { + "epoch": 3.170057381324987, + "grad_norm": 0.43671727368596946, + "learning_rate": 2.6666903980924284e-05, + "loss": 0.1821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09794063866138458, + "step": 3040, + "valid_targets_mean": 5281.6, + "valid_targets_min": 593 + }, + { + "epoch": 3.1752738654147104, + "grad_norm": 0.3954682133922885, + "learning_rate": 2.6617851390889074e-05, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07609634101390839, + "step": 3045, + "valid_targets_mean": 7339.1, + "valid_targets_min": 4595 + }, + { + "epoch": 3.180490349504434, + "grad_norm": 0.3976362192003204, + "learning_rate": 2.6568754056437412e-05, + "loss": 0.1556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07882060110569, + "step": 3050, + "valid_targets_mean": 6674.0, + "valid_targets_min": 5411 + }, + { + "epoch": 3.1857068335941574, + "grad_norm": 0.43244911111039364, + "learning_rate": 2.6519612309524727e-05, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08966496586799622, + "step": 3055, + "valid_targets_mean": 6596.2, + "valid_targets_min": 4451 + }, + { + "epoch": 3.190923317683881, + "grad_norm": 0.4265039411607974, + "learning_rate": 2.6470426482406688e-05, + "loss": 0.183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09334418177604675, + "step": 3060, + "valid_targets_mean": 7159.2, + "valid_targets_min": 5568 + }, + { + "epoch": 3.1961398017736045, + "grad_norm": 0.41758210797544837, + "learning_rate": 2.6421196907637036e-05, + "loss": 0.1804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10056646168231964, + "step": 3065, + "valid_targets_mean": 6923.1, + "valid_targets_min": 4839 + }, + { + "epoch": 3.201356285863328, + "grad_norm": 0.45619739783181074, + "learning_rate": 2.6371923918065273e-05, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07980535179376602, + "step": 3070, + "valid_targets_mean": 4921.4, + "valid_targets_min": 3517 + }, + { + "epoch": 3.2065727699530515, + "grad_norm": 0.48059075142736524, + "learning_rate": 2.6322607846834444e-05, + "loss": 0.1644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07865060865879059, + "step": 3075, + "valid_targets_mean": 5929.9, + "valid_targets_min": 3551 + }, + { + "epoch": 3.2117892540427753, + "grad_norm": 0.47591754034029016, + "learning_rate": 2.6273249027378878e-05, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07587525248527527, + "step": 3080, + "valid_targets_mean": 5854.9, + "valid_targets_min": 4614 + }, + { + "epoch": 3.2170057381324986, + "grad_norm": 0.4709775016994486, + "learning_rate": 2.6223847793421938e-05, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09627166390419006, + "step": 3085, + "valid_targets_mean": 6779.1, + "valid_targets_min": 5050 + }, + { + "epoch": 3.2222222222222223, + "grad_norm": 0.4297455027534991, + "learning_rate": 2.6174404478973746e-05, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08504495024681091, + "step": 3090, + "valid_targets_mean": 5993.4, + "valid_targets_min": 4734 + }, + { + "epoch": 3.2274387063119456, + "grad_norm": 0.4087393103967894, + "learning_rate": 2.612491941832894e-05, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08444757759571075, + "step": 3095, + "valid_targets_mean": 6826.9, + "valid_targets_min": 4966 + }, + { + "epoch": 3.2326551904016694, + "grad_norm": 0.3956807015631005, + "learning_rate": 2.6075392946064417e-05, + "loss": 0.1861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07641545683145523, + "step": 3100, + "valid_targets_mean": 6366.5, + "valid_targets_min": 4510 + }, + { + "epoch": 3.2378716744913927, + "grad_norm": 0.415527848273328, + "learning_rate": 2.6025825397037057e-05, + "loss": 0.153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.067385233938694, + "step": 3105, + "valid_targets_mean": 5248.9, + "valid_targets_min": 4430 + }, + { + "epoch": 3.2430881585811164, + "grad_norm": 0.3963375526633729, + "learning_rate": 2.5976217106381477e-05, + "loss": 0.149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07113958150148392, + "step": 3110, + "valid_targets_mean": 5854.6, + "valid_targets_min": 4701 + }, + { + "epoch": 3.2483046426708397, + "grad_norm": 0.40445715265302534, + "learning_rate": 2.5926568409507754e-05, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08333644270896912, + "step": 3115, + "valid_targets_mean": 6520.2, + "valid_targets_min": 4501 + }, + { + "epoch": 3.2535211267605635, + "grad_norm": 0.4010524436484236, + "learning_rate": 2.587687964209914e-05, + "loss": 0.1624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08973240852355957, + "step": 3120, + "valid_targets_mean": 6796.4, + "valid_targets_min": 3640 + }, + { + "epoch": 3.258737610850287, + "grad_norm": 0.4331254246430217, + "learning_rate": 2.5827151140109836e-05, + "loss": 0.1823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08798360824584961, + "step": 3125, + "valid_targets_mean": 6024.8, + "valid_targets_min": 5024 + }, + { + "epoch": 3.2639540949400105, + "grad_norm": 0.45647271568951914, + "learning_rate": 2.5777383239762676e-05, + "loss": 0.2065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09865845739841461, + "step": 3130, + "valid_targets_mean": 6549.4, + "valid_targets_min": 4715 + }, + { + "epoch": 3.269170579029734, + "grad_norm": 0.4377846379029583, + "learning_rate": 2.5727576277546888e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0739026814699173, + "step": 3135, + "valid_targets_mean": 5681.1, + "valid_targets_min": 4730 + }, + { + "epoch": 3.2743870631194576, + "grad_norm": 0.4524801269437676, + "learning_rate": 2.5677730590215792e-05, + "loss": 0.1699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08780796080827713, + "step": 3140, + "valid_targets_mean": 5787.6, + "valid_targets_min": 4657 + }, + { + "epoch": 3.279603547209181, + "grad_norm": 0.42016246709909255, + "learning_rate": 2.5627846514784553e-05, + "loss": 0.1709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08939069509506226, + "step": 3145, + "valid_targets_mean": 6862.9, + "valid_targets_min": 5599 + }, + { + "epoch": 3.2848200312989047, + "grad_norm": 0.4293568568631133, + "learning_rate": 2.5577924388527847e-05, + "loss": 0.1869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11343889683485031, + "step": 3150, + "valid_targets_mean": 6471.2, + "valid_targets_min": 4948 + }, + { + "epoch": 3.290036515388628, + "grad_norm": 0.4564259499206169, + "learning_rate": 2.552796454897766e-05, + "loss": 0.1819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09474968910217285, + "step": 3155, + "valid_targets_mean": 7644.9, + "valid_targets_min": 4876 + }, + { + "epoch": 3.2952529994783517, + "grad_norm": 0.4110674827429929, + "learning_rate": 2.5477967333920942e-05, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07394257187843323, + "step": 3160, + "valid_targets_mean": 5750.1, + "valid_targets_min": 3599 + }, + { + "epoch": 3.300469483568075, + "grad_norm": 0.40915595241571634, + "learning_rate": 2.5427933081397357e-05, + "loss": 0.1813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10283771902322769, + "step": 3165, + "valid_targets_mean": 7335.2, + "valid_targets_min": 4800 + }, + { + "epoch": 3.3056859676577988, + "grad_norm": 0.5398867538381563, + "learning_rate": 2.5377862129696966e-05, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06837309896945953, + "step": 3170, + "valid_targets_mean": 7088.9, + "valid_targets_min": 4228 + }, + { + "epoch": 3.310902451747522, + "grad_norm": 0.43203435551312236, + "learning_rate": 2.5327754817357974e-05, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08902721107006073, + "step": 3175, + "valid_targets_mean": 6421.2, + "valid_targets_min": 5314 + }, + { + "epoch": 3.316118935837246, + "grad_norm": 0.3976187779779871, + "learning_rate": 2.527761148316443e-05, + "loss": 0.2168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08306819200515747, + "step": 3180, + "valid_targets_mean": 6345.4, + "valid_targets_min": 4634 + }, + { + "epoch": 3.321335419926969, + "grad_norm": 0.3894797317943454, + "learning_rate": 2.522743246614392e-05, + "loss": 0.1631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07459355890750885, + "step": 3185, + "valid_targets_mean": 5758.2, + "valid_targets_min": 4521 + }, + { + "epoch": 3.326551904016693, + "grad_norm": 0.41325686939609974, + "learning_rate": 2.5177218105565306e-05, + "loss": 0.1701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08164391666650772, + "step": 3190, + "valid_targets_mean": 5346.1, + "valid_targets_min": 4180 + }, + { + "epoch": 3.331768388106416, + "grad_norm": 0.386571458619822, + "learning_rate": 2.5126968740936386e-05, + "loss": 0.174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09173017740249634, + "step": 3195, + "valid_targets_mean": 7561.1, + "valid_targets_min": 4791 + }, + { + "epoch": 3.33698487219614, + "grad_norm": 0.4489109126443259, + "learning_rate": 2.5076684712001655e-05, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07875959575176239, + "step": 3200, + "valid_targets_mean": 6135.1, + "valid_targets_min": 4670 + }, + { + "epoch": 3.3422013562858632, + "grad_norm": 0.44877691106254824, + "learning_rate": 2.5026366358739963e-05, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08880254626274109, + "step": 3205, + "valid_targets_mean": 6526.8, + "valid_targets_min": 4806 + }, + { + "epoch": 3.347417840375587, + "grad_norm": 0.43644973967489004, + "learning_rate": 2.4976014021362236e-05, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07076229155063629, + "step": 3210, + "valid_targets_mean": 5486.9, + "valid_targets_min": 4453 + }, + { + "epoch": 3.3526343244653103, + "grad_norm": 0.4042077900929801, + "learning_rate": 2.492562804030918e-05, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09146247804164886, + "step": 3215, + "valid_targets_mean": 7182.4, + "valid_targets_min": 5687 + }, + { + "epoch": 3.357850808555034, + "grad_norm": 0.4039407900391138, + "learning_rate": 2.487520875624895e-05, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07359585911035538, + "step": 3220, + "valid_targets_mean": 6945.2, + "valid_targets_min": 4964 + }, + { + "epoch": 3.3630672926447573, + "grad_norm": 0.3787376222854076, + "learning_rate": 2.48247565100749e-05, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08290009200572968, + "step": 3225, + "valid_targets_mean": 7281.2, + "valid_targets_min": 5625 + }, + { + "epoch": 3.368283776734481, + "grad_norm": 0.38039714041533085, + "learning_rate": 2.4774271642903213e-05, + "loss": 0.1699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07531018555164337, + "step": 3230, + "valid_targets_mean": 6706.9, + "valid_targets_min": 5006 + }, + { + "epoch": 3.3735002608242044, + "grad_norm": 0.3889722787046288, + "learning_rate": 2.4723754496070652e-05, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08490554988384247, + "step": 3235, + "valid_targets_mean": 6947.8, + "valid_targets_min": 5252 + }, + { + "epoch": 3.378716744913928, + "grad_norm": 0.42227768230291607, + "learning_rate": 2.4673205411132218e-05, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08523184806108475, + "step": 3240, + "valid_targets_mean": 6373.1, + "valid_targets_min": 5071 + }, + { + "epoch": 3.3839332290036515, + "grad_norm": 0.4276453799405262, + "learning_rate": 2.462262472985886e-05, + "loss": 0.1731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09290973097085953, + "step": 3245, + "valid_targets_mean": 6664.0, + "valid_targets_min": 3440 + }, + { + "epoch": 3.389149713093375, + "grad_norm": 0.4843703303482478, + "learning_rate": 2.457201279423514e-05, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09049367904663086, + "step": 3250, + "valid_targets_mean": 4595.1, + "valid_targets_min": 1320 + }, + { + "epoch": 3.3943661971830985, + "grad_norm": 0.49447685708845535, + "learning_rate": 2.4521369946456954e-05, + "loss": 0.2566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13539639115333557, + "step": 3255, + "valid_targets_mean": 6093.6, + "valid_targets_min": 746 + }, + { + "epoch": 3.3995826812728223, + "grad_norm": 0.47341813306130065, + "learning_rate": 2.447069652892918e-05, + "loss": 0.2471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12410315871238708, + "step": 3260, + "valid_targets_mean": 5458.9, + "valid_targets_min": 1846 + }, + { + "epoch": 3.4047991653625456, + "grad_norm": 0.5218632781010671, + "learning_rate": 2.4419992884263404e-05, + "loss": 0.2371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08286994695663452, + "step": 3265, + "valid_targets_mean": 4571.6, + "valid_targets_min": 1321 + }, + { + "epoch": 3.4100156494522693, + "grad_norm": 0.550674517568542, + "learning_rate": 2.436925935527557e-05, + "loss": 0.244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12660722434520721, + "step": 3270, + "valid_targets_mean": 5429.1, + "valid_targets_min": 2373 + }, + { + "epoch": 3.4152321335419926, + "grad_norm": 1.9974588070597539, + "learning_rate": 2.4318496284983676e-05, + "loss": 0.2529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12181459367275238, + "step": 3275, + "valid_targets_mean": 4797.8, + "valid_targets_min": 2075 + }, + { + "epoch": 3.4204486176317164, + "grad_norm": 0.5595684644018506, + "learning_rate": 2.4267704016605453e-05, + "loss": 0.248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12632699310779572, + "step": 3280, + "valid_targets_mean": 5345.5, + "valid_targets_min": 1089 + }, + { + "epoch": 3.4256651017214397, + "grad_norm": 0.5938237896691158, + "learning_rate": 2.421688289355606e-05, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12271217256784439, + "step": 3285, + "valid_targets_mean": 3780.1, + "valid_targets_min": 2044 + }, + { + "epoch": 3.4308815858111634, + "grad_norm": 0.5550593436012659, + "learning_rate": 2.4166033259445723e-05, + "loss": 0.2473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11569850146770477, + "step": 3290, + "valid_targets_mean": 3887.6, + "valid_targets_min": 1170 + }, + { + "epoch": 3.4360980699008867, + "grad_norm": 0.5564035018298186, + "learning_rate": 2.4115155458077453e-05, + "loss": 0.2473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10417018830776215, + "step": 3295, + "valid_targets_mean": 4304.1, + "valid_targets_min": 1241 + }, + { + "epoch": 3.4413145539906105, + "grad_norm": 0.5440160556521736, + "learning_rate": 2.4064249833444702e-05, + "loss": 0.245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12575963139533997, + "step": 3300, + "valid_targets_mean": 4551.2, + "valid_targets_min": 2747 + }, + { + "epoch": 3.446531038080334, + "grad_norm": 0.5056956524145735, + "learning_rate": 2.401331672972904e-05, + "loss": 0.2425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10744407773017883, + "step": 3305, + "valid_targets_mean": 5571.5, + "valid_targets_min": 2144 + }, + { + "epoch": 3.4517475221700575, + "grad_norm": 0.57945528219038, + "learning_rate": 2.3962356491297814e-05, + "loss": 0.2422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11964833736419678, + "step": 3310, + "valid_targets_mean": 4508.5, + "valid_targets_min": 2587 + }, + { + "epoch": 3.456964006259781, + "grad_norm": 0.5282644268385173, + "learning_rate": 2.391136946270186e-05, + "loss": 0.2478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11077821254730225, + "step": 3315, + "valid_targets_mean": 4478.0, + "valid_targets_min": 1366 + }, + { + "epoch": 3.4621804903495046, + "grad_norm": 0.6142526811163147, + "learning_rate": 2.3860355988673122e-05, + "loss": 0.241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1114354133605957, + "step": 3320, + "valid_targets_mean": 3714.5, + "valid_targets_min": 1884 + }, + { + "epoch": 3.467396974439228, + "grad_norm": 0.6434862436010371, + "learning_rate": 2.380931641412236e-05, + "loss": 0.2519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11873886734247208, + "step": 3325, + "valid_targets_mean": 3043.0, + "valid_targets_min": 746 + }, + { + "epoch": 3.4726134585289516, + "grad_norm": 0.6125036123314246, + "learning_rate": 2.3758251084136794e-05, + "loss": 0.2317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11632031947374344, + "step": 3330, + "valid_targets_mean": 3548.1, + "valid_targets_min": 1538 + }, + { + "epoch": 3.477829942618675, + "grad_norm": 0.6079487705963257, + "learning_rate": 2.3707160343977792e-05, + "loss": 0.2409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10519608855247498, + "step": 3335, + "valid_targets_mean": 3421.4, + "valid_targets_min": 1451 + }, + { + "epoch": 3.4830464267083987, + "grad_norm": 0.6429589255944179, + "learning_rate": 2.3656044539078523e-05, + "loss": 0.2591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12720975279808044, + "step": 3340, + "valid_targets_mean": 3797.6, + "valid_targets_min": 2227 + }, + { + "epoch": 3.488262910798122, + "grad_norm": 0.5931494023245428, + "learning_rate": 2.3604904015041617e-05, + "loss": 0.249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10338112711906433, + "step": 3345, + "valid_targets_mean": 3432.5, + "valid_targets_min": 1323 + }, + { + "epoch": 3.4934793948878458, + "grad_norm": 0.6605675164932167, + "learning_rate": 2.3553739117636835e-05, + "loss": 0.2443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1653464138507843, + "step": 3350, + "valid_targets_mean": 4970.6, + "valid_targets_min": 1921 + }, + { + "epoch": 3.498695878977569, + "grad_norm": 0.572184450489533, + "learning_rate": 2.3502550192798726e-05, + "loss": 0.2301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10803180932998657, + "step": 3355, + "valid_targets_mean": 4261.2, + "valid_targets_min": 2886 + }, + { + "epoch": 3.5039123630672924, + "grad_norm": 0.609667696840384, + "learning_rate": 2.345133758662431e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.135938361287117, + "step": 3360, + "valid_targets_mean": 4403.1, + "valid_targets_min": 2828 + }, + { + "epoch": 3.509128847157016, + "grad_norm": 0.6518300910360599, + "learning_rate": 2.3400101645370702e-05, + "loss": 0.2222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12049508094787598, + "step": 3365, + "valid_targets_mean": 3175.6, + "valid_targets_min": 1631 + }, + { + "epoch": 3.51434533124674, + "grad_norm": 0.6462530774764337, + "learning_rate": 2.3348842715452803e-05, + "loss": 0.2428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1236698105931282, + "step": 3370, + "valid_targets_mean": 3826.5, + "valid_targets_min": 2606 + }, + { + "epoch": 3.519561815336463, + "grad_norm": 0.5864694586174704, + "learning_rate": 2.3297561143440932e-05, + "loss": 0.2388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10081040859222412, + "step": 3375, + "valid_targets_mean": 3574.1, + "valid_targets_min": 1475 + }, + { + "epoch": 3.5247782994261865, + "grad_norm": 0.5747657013944781, + "learning_rate": 2.3246257276058507e-05, + "loss": 0.2362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09470266103744507, + "step": 3380, + "valid_targets_mean": 3230.2, + "valid_targets_min": 648 + }, + { + "epoch": 3.5299947835159102, + "grad_norm": 0.6159945121829697, + "learning_rate": 2.3194931460179677e-05, + "loss": 0.2442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1319727897644043, + "step": 3385, + "valid_targets_mean": 4009.9, + "valid_targets_min": 2511 + }, + { + "epoch": 3.535211267605634, + "grad_norm": 0.6857031713431712, + "learning_rate": 2.314358404282699e-05, + "loss": 0.2249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10448281466960907, + "step": 3390, + "valid_targets_mean": 3089.0, + "valid_targets_min": 2831 + }, + { + "epoch": 3.5404277516953573, + "grad_norm": 0.6491219562499029, + "learning_rate": 2.309221537116906e-05, + "loss": 0.239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11855064332485199, + "step": 3395, + "valid_targets_mean": 3622.1, + "valid_targets_min": 1825 + }, + { + "epoch": 3.5456442357850806, + "grad_norm": 0.6700500164258185, + "learning_rate": 2.3040825792518188e-05, + "loss": 0.2318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11624139547348022, + "step": 3400, + "valid_targets_mean": 3218.0, + "valid_targets_min": 1108 + }, + { + "epoch": 3.5508607198748043, + "grad_norm": 0.7057109000859215, + "learning_rate": 2.298941565432804e-05, + "loss": 0.2324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0822981521487236, + "step": 3405, + "valid_targets_mean": 2481.9, + "valid_targets_min": 1501 + }, + { + "epoch": 3.556077203964528, + "grad_norm": 0.6835437421630167, + "learning_rate": 2.2937985304191285e-05, + "loss": 0.2323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13851001858711243, + "step": 3410, + "valid_targets_mean": 3836.0, + "valid_targets_min": 1828 + }, + { + "epoch": 3.5612936880542514, + "grad_norm": 0.5913739977791005, + "learning_rate": 2.2886535089837247e-05, + "loss": 0.2364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12443605065345764, + "step": 3415, + "valid_targets_mean": 4457.2, + "valid_targets_min": 1716 + }, + { + "epoch": 3.5665101721439747, + "grad_norm": 0.7149911052016908, + "learning_rate": 2.2835065359129564e-05, + "loss": 0.2314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10614100098609924, + "step": 3420, + "valid_targets_mean": 2995.9, + "valid_targets_min": 1281 + }, + { + "epoch": 3.5717266562336984, + "grad_norm": 0.6411076114814623, + "learning_rate": 2.278357646006381e-05, + "loss": 0.2402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09647589921951294, + "step": 3425, + "valid_targets_mean": 2898.4, + "valid_targets_min": 1713 + }, + { + "epoch": 3.576943140323422, + "grad_norm": 0.6101535296826472, + "learning_rate": 2.273206874076518e-05, + "loss": 0.2436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10247083753347397, + "step": 3430, + "valid_targets_mean": 3753.6, + "valid_targets_min": 1727 + }, + { + "epoch": 3.5821596244131455, + "grad_norm": 0.6108836056440757, + "learning_rate": 2.26805425494861e-05, + "loss": 0.2282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1230870932340622, + "step": 3435, + "valid_targets_mean": 4313.0, + "valid_targets_min": 1965 + }, + { + "epoch": 3.587376108502869, + "grad_norm": 0.601919420197728, + "learning_rate": 2.26289982346039e-05, + "loss": 0.2324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10972052812576294, + "step": 3440, + "valid_targets_mean": 3430.1, + "valid_targets_min": 1184 + }, + { + "epoch": 3.5925925925925926, + "grad_norm": 0.5950604934200117, + "learning_rate": 2.2577436144618438e-05, + "loss": 0.2377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11043146252632141, + "step": 3445, + "valid_targets_mean": 4116.4, + "valid_targets_min": 2490 + }, + { + "epoch": 3.5978090766823163, + "grad_norm": 0.6396827974042852, + "learning_rate": 2.2525856628149754e-05, + "loss": 0.2442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12181945890188217, + "step": 3450, + "valid_targets_mean": 3019.1, + "valid_targets_min": 2109 + }, + { + "epoch": 3.6030255607720396, + "grad_norm": 0.7490350218832177, + "learning_rate": 2.2474260033935718e-05, + "loss": 0.23, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13523289561271667, + "step": 3455, + "valid_targets_mean": 3152.6, + "valid_targets_min": 1648 + }, + { + "epoch": 3.608242044861763, + "grad_norm": 0.6453758541573309, + "learning_rate": 2.2422646710829653e-05, + "loss": 0.2384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12171995639801025, + "step": 3460, + "valid_targets_mean": 3639.5, + "valid_targets_min": 1580 + }, + { + "epoch": 3.6134585289514867, + "grad_norm": 0.6265329336355708, + "learning_rate": 2.2371017007798005e-05, + "loss": 0.2329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11353909969329834, + "step": 3465, + "valid_targets_mean": 3558.6, + "valid_targets_min": 1577 + }, + { + "epoch": 3.6186750130412104, + "grad_norm": 0.535808820190994, + "learning_rate": 2.231937127391795e-05, + "loss": 0.2279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10289524495601654, + "step": 3470, + "valid_targets_mean": 4494.1, + "valid_targets_min": 2095 + }, + { + "epoch": 3.6238914971309337, + "grad_norm": 0.6222430983851605, + "learning_rate": 2.226770985837507e-05, + "loss": 0.2294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09932754933834076, + "step": 3475, + "valid_targets_mean": 3304.9, + "valid_targets_min": 1673 + }, + { + "epoch": 3.629107981220657, + "grad_norm": 0.580786669511027, + "learning_rate": 2.2216033110460952e-05, + "loss": 0.2413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12156185507774353, + "step": 3480, + "valid_targets_mean": 4571.5, + "valid_targets_min": 2494 + }, + { + "epoch": 3.634324465310381, + "grad_norm": 0.6176732988144921, + "learning_rate": 2.216434137957087e-05, + "loss": 0.232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10464446246623993, + "step": 3485, + "valid_targets_mean": 3386.9, + "valid_targets_min": 742 + }, + { + "epoch": 3.6395409494001045, + "grad_norm": 0.6378445830576676, + "learning_rate": 2.2112635015201383e-05, + "loss": 0.2414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1168811246752739, + "step": 3490, + "valid_targets_mean": 3257.8, + "valid_targets_min": 1766 + }, + { + "epoch": 3.644757433489828, + "grad_norm": 0.6157387330348644, + "learning_rate": 2.2060914366948004e-05, + "loss": 0.2297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11369498819112778, + "step": 3495, + "valid_targets_mean": 4264.8, + "valid_targets_min": 1504 + }, + { + "epoch": 3.649973917579551, + "grad_norm": 0.5924316164847073, + "learning_rate": 2.2009179784502802e-05, + "loss": 0.2402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.121125727891922, + "step": 3500, + "valid_targets_mean": 4329.9, + "valid_targets_min": 2203 + }, + { + "epoch": 3.655190401669275, + "grad_norm": 0.6404002992356811, + "learning_rate": 2.1957431617652074e-05, + "loss": 0.2289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11320274323225021, + "step": 3505, + "valid_targets_mean": 2942.0, + "valid_targets_min": 1955 + }, + { + "epoch": 3.6604068857589986, + "grad_norm": 0.6617084629972766, + "learning_rate": 2.1905670216273967e-05, + "loss": 0.237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10087624192237854, + "step": 3510, + "valid_targets_mean": 3146.5, + "valid_targets_min": 1221 + }, + { + "epoch": 3.665623369848722, + "grad_norm": 0.6075614320816415, + "learning_rate": 2.185389593033609e-05, + "loss": 0.2241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09192607551813126, + "step": 3515, + "valid_targets_mean": 3952.9, + "valid_targets_min": 2506 + }, + { + "epoch": 3.6708398539384453, + "grad_norm": 0.5972528923230254, + "learning_rate": 2.1802109109893184e-05, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10351152718067169, + "step": 3520, + "valid_targets_mean": 4308.6, + "valid_targets_min": 2526 + }, + { + "epoch": 3.676056338028169, + "grad_norm": 0.6498529192718818, + "learning_rate": 2.1750310105084725e-05, + "loss": 0.22, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12835030257701874, + "step": 3525, + "valid_targets_mean": 3933.8, + "valid_targets_min": 464 + }, + { + "epoch": 3.6812728221178928, + "grad_norm": 0.6801302208644183, + "learning_rate": 2.1698499266132585e-05, + "loss": 0.238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1157878041267395, + "step": 3530, + "valid_targets_mean": 3871.4, + "valid_targets_min": 1003 + }, + { + "epoch": 3.686489306207616, + "grad_norm": 0.6326322401002196, + "learning_rate": 2.164667694333863e-05, + "loss": 0.2322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12699203193187714, + "step": 3535, + "valid_targets_mean": 3440.4, + "valid_targets_min": 1446 + }, + { + "epoch": 3.6917057902973394, + "grad_norm": 0.6053551433510863, + "learning_rate": 2.159484348708239e-05, + "loss": 0.2362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.109659843146801, + "step": 3540, + "valid_targets_mean": 3742.8, + "valid_targets_min": 1969 + }, + { + "epoch": 3.696922274387063, + "grad_norm": 0.5908162508368314, + "learning_rate": 2.1542999247818653e-05, + "loss": 0.2283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10403618216514587, + "step": 3545, + "valid_targets_mean": 3731.5, + "valid_targets_min": 1460 + }, + { + "epoch": 3.702138758476787, + "grad_norm": 0.6036526598056554, + "learning_rate": 2.149114457607512e-05, + "loss": 0.2409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10332241654396057, + "step": 3550, + "valid_targets_mean": 3264.1, + "valid_targets_min": 1172 + }, + { + "epoch": 3.70735524256651, + "grad_norm": 0.6085527473946052, + "learning_rate": 2.1439279822450034e-05, + "loss": 0.2328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11266172677278519, + "step": 3555, + "valid_targets_mean": 3995.9, + "valid_targets_min": 1818 + }, + { + "epoch": 3.7125717266562335, + "grad_norm": 0.5864304454335394, + "learning_rate": 2.1387405337609787e-05, + "loss": 0.2246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11255607008934021, + "step": 3560, + "valid_targets_mean": 3900.1, + "valid_targets_min": 1535 + }, + { + "epoch": 3.7177882107459572, + "grad_norm": 0.6175610484267877, + "learning_rate": 2.1335521472286578e-05, + "loss": 0.2267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1414259672164917, + "step": 3565, + "valid_targets_mean": 4962.2, + "valid_targets_min": 1814 + }, + { + "epoch": 3.723004694835681, + "grad_norm": 0.6933781442781956, + "learning_rate": 2.1283628577276034e-05, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11218614876270294, + "step": 3570, + "valid_targets_mean": 2853.1, + "valid_targets_min": 1073 + }, + { + "epoch": 3.7282211789254043, + "grad_norm": 0.6037281725945689, + "learning_rate": 2.1231727003434816e-05, + "loss": 0.2205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10450787842273712, + "step": 3575, + "valid_targets_mean": 3623.0, + "valid_targets_min": 2420 + }, + { + "epoch": 3.7334376630151276, + "grad_norm": 0.735023548120051, + "learning_rate": 2.1179817101678272e-05, + "loss": 0.2154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11749535799026489, + "step": 3580, + "valid_targets_mean": 3982.6, + "valid_targets_min": 1079 + }, + { + "epoch": 3.7386541471048513, + "grad_norm": 0.6294514779732339, + "learning_rate": 2.1127899222978064e-05, + "loss": 0.2115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09856463223695755, + "step": 3585, + "valid_targets_mean": 3324.5, + "valid_targets_min": 1143 + }, + { + "epoch": 3.743870631194575, + "grad_norm": 0.6522931613332661, + "learning_rate": 2.107597371835979e-05, + "loss": 0.2229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09610074758529663, + "step": 3590, + "valid_targets_mean": 2928.0, + "valid_targets_min": 1853 + }, + { + "epoch": 3.7490871152842984, + "grad_norm": 0.8690875447819433, + "learning_rate": 2.1024040938900586e-05, + "loss": 0.2461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10033658146858215, + "step": 3595, + "valid_targets_mean": 2791.0, + "valid_targets_min": 1410 + }, + { + "epoch": 3.7543035993740217, + "grad_norm": 0.6568052484525181, + "learning_rate": 2.097210123572679e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09920533001422882, + "step": 3600, + "valid_targets_mean": 2726.1, + "valid_targets_min": 1566 + }, + { + "epoch": 3.7595200834637454, + "grad_norm": 0.657850092276732, + "learning_rate": 2.092015496001155e-05, + "loss": 0.2342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1287340521812439, + "step": 3605, + "valid_targets_mean": 4028.4, + "valid_targets_min": 2318 + }, + { + "epoch": 3.764736567553469, + "grad_norm": 0.609458873386069, + "learning_rate": 2.0868202462972458e-05, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1030699834227562, + "step": 3610, + "valid_targets_mean": 3282.1, + "valid_targets_min": 1509 + }, + { + "epoch": 3.7699530516431925, + "grad_norm": 0.6210939921177354, + "learning_rate": 2.0816244095869157e-05, + "loss": 0.2241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12786614894866943, + "step": 3615, + "valid_targets_mean": 4130.6, + "valid_targets_min": 2258 + }, + { + "epoch": 3.775169535732916, + "grad_norm": 0.6309877794519321, + "learning_rate": 2.0764280210001e-05, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.110764279961586, + "step": 3620, + "valid_targets_mean": 3620.4, + "valid_targets_min": 1281 + }, + { + "epoch": 3.7803860198226396, + "grad_norm": 0.5960229828659319, + "learning_rate": 2.0712311156704624e-05, + "loss": 0.2218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10715222358703613, + "step": 3625, + "valid_targets_mean": 3482.8, + "valid_targets_min": 1445 + }, + { + "epoch": 3.7856025039123633, + "grad_norm": 0.6155856960254252, + "learning_rate": 2.066033728735162e-05, + "loss": 0.2291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14788605272769928, + "step": 3630, + "valid_targets_mean": 4695.8, + "valid_targets_min": 3677 + }, + { + "epoch": 3.7908189880020866, + "grad_norm": 0.617645299332338, + "learning_rate": 2.0608358953346146e-05, + "loss": 0.2205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11858838051557541, + "step": 3635, + "valid_targets_mean": 4345.0, + "valid_targets_min": 2121 + }, + { + "epoch": 3.79603547209181, + "grad_norm": 0.6123558733825454, + "learning_rate": 2.0556376506122535e-05, + "loss": 0.2184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11472264677286148, + "step": 3640, + "valid_targets_mean": 3781.2, + "valid_targets_min": 1508 + }, + { + "epoch": 3.8012519561815337, + "grad_norm": 0.6778232475004164, + "learning_rate": 2.0504390297142935e-05, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12136973440647125, + "step": 3645, + "valid_targets_mean": 4328.5, + "valid_targets_min": 3504 + }, + { + "epoch": 3.8064684402712574, + "grad_norm": 0.586195037199297, + "learning_rate": 2.0452400677894936e-05, + "loss": 0.2343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10308879613876343, + "step": 3650, + "valid_targets_mean": 3744.1, + "valid_targets_min": 1849 + }, + { + "epoch": 3.8116849243609807, + "grad_norm": 0.7747014501750388, + "learning_rate": 2.0400407999889165e-05, + "loss": 0.2395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1523452252149582, + "step": 3655, + "valid_targets_mean": 4411.9, + "valid_targets_min": 3047 + }, + { + "epoch": 3.816901408450704, + "grad_norm": 0.631890036831002, + "learning_rate": 2.0348412614656952e-05, + "loss": 0.2342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11392106860876083, + "step": 3660, + "valid_targets_mean": 3854.1, + "valid_targets_min": 2310 + }, + { + "epoch": 3.8221178925404278, + "grad_norm": 0.6756434056050991, + "learning_rate": 2.0296414873747915e-05, + "loss": 0.2349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12308912724256516, + "step": 3665, + "valid_targets_mean": 4585.8, + "valid_targets_min": 2103 + }, + { + "epoch": 3.8273343766301515, + "grad_norm": 0.655202939575796, + "learning_rate": 2.0244415128727612e-05, + "loss": 0.2397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11594152450561523, + "step": 3670, + "valid_targets_mean": 3490.0, + "valid_targets_min": 1400 + }, + { + "epoch": 3.832550860719875, + "grad_norm": 0.6341691220552156, + "learning_rate": 2.0192413731175146e-05, + "loss": 0.2429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11282789707183838, + "step": 3675, + "valid_targets_mean": 3865.4, + "valid_targets_min": 1433 + }, + { + "epoch": 3.837767344809598, + "grad_norm": 0.6037434144515371, + "learning_rate": 2.0140411032680775e-05, + "loss": 0.2221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1037430614233017, + "step": 3680, + "valid_targets_mean": 4004.1, + "valid_targets_min": 614 + }, + { + "epoch": 3.842983828899322, + "grad_norm": 0.6701085693537032, + "learning_rate": 2.008840738484359e-05, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1198904812335968, + "step": 3685, + "valid_targets_mean": 3350.6, + "valid_targets_min": 2041 + }, + { + "epoch": 3.8482003129890456, + "grad_norm": 0.6718634277813329, + "learning_rate": 2.0036403139269067e-05, + "loss": 0.2373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1104774922132492, + "step": 3690, + "valid_targets_mean": 3191.9, + "valid_targets_min": 1333 + }, + { + "epoch": 3.853416797078769, + "grad_norm": 0.6255672394263412, + "learning_rate": 1.998439864756674e-05, + "loss": 0.23, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11407849192619324, + "step": 3695, + "valid_targets_mean": 3936.2, + "valid_targets_min": 1325 + }, + { + "epoch": 3.8586332811684922, + "grad_norm": 0.5474797527969552, + "learning_rate": 1.993239426134781e-05, + "loss": 0.2275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09667281806468964, + "step": 3700, + "valid_targets_mean": 4192.9, + "valid_targets_min": 2754 + }, + { + "epoch": 3.863849765258216, + "grad_norm": 0.6359335824482076, + "learning_rate": 1.988039033222275e-05, + "loss": 0.2189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09706789255142212, + "step": 3705, + "valid_targets_mean": 3369.1, + "valid_targets_min": 1991 + }, + { + "epoch": 3.8690662493479397, + "grad_norm": 0.6270096258326283, + "learning_rate": 1.982838721179896e-05, + "loss": 0.2313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11430954933166504, + "step": 3710, + "valid_targets_mean": 3181.0, + "valid_targets_min": 1076 + }, + { + "epoch": 3.874282733437663, + "grad_norm": 0.6413656224949742, + "learning_rate": 1.9776385251678352e-05, + "loss": 0.216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09798961877822876, + "step": 3715, + "valid_targets_mean": 3769.9, + "valid_targets_min": 521 + }, + { + "epoch": 3.8794992175273864, + "grad_norm": 0.7900229781344678, + "learning_rate": 1.9724384803455013e-05, + "loss": 0.236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11032996326684952, + "step": 3720, + "valid_targets_mean": 4186.6, + "valid_targets_min": 2185 + }, + { + "epoch": 3.88471570161711, + "grad_norm": 0.6001435138684766, + "learning_rate": 1.9672386218712794e-05, + "loss": 0.2285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08615221828222275, + "step": 3725, + "valid_targets_mean": 3472.0, + "valid_targets_min": 2024 + }, + { + "epoch": 3.889932185706834, + "grad_norm": 0.6570085208557005, + "learning_rate": 1.962038984902295e-05, + "loss": 0.2358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10530970990657806, + "step": 3730, + "valid_targets_mean": 3290.8, + "valid_targets_min": 1777 + }, + { + "epoch": 3.895148669796557, + "grad_norm": 0.9191636608831101, + "learning_rate": 1.9568396045941766e-05, + "loss": 0.2199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09803766012191772, + "step": 3735, + "valid_targets_mean": 3587.0, + "valid_targets_min": 1965 + }, + { + "epoch": 3.9003651538862805, + "grad_norm": 0.6213449204245672, + "learning_rate": 1.951640516100817e-05, + "loss": 0.237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14052420854568481, + "step": 3740, + "valid_targets_mean": 5177.9, + "valid_targets_min": 2587 + }, + { + "epoch": 3.905581637976004, + "grad_norm": 0.5961526090853403, + "learning_rate": 1.9464417545741352e-05, + "loss": 0.228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13717275857925415, + "step": 3745, + "valid_targets_mean": 5357.6, + "valid_targets_min": 2762 + }, + { + "epoch": 3.910798122065728, + "grad_norm": 0.6606352286492071, + "learning_rate": 1.941243355163841e-05, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10850200057029724, + "step": 3750, + "valid_targets_mean": 3201.5, + "valid_targets_min": 1035 + }, + { + "epoch": 3.9160146061554513, + "grad_norm": 0.7225624372617789, + "learning_rate": 1.936045353017195e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13333365321159363, + "step": 3755, + "valid_targets_mean": 4252.9, + "valid_targets_min": 1876 + }, + { + "epoch": 3.9212310902451746, + "grad_norm": 0.5826363396190232, + "learning_rate": 1.9308477832787724e-05, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10424569249153137, + "step": 3760, + "valid_targets_mean": 3750.8, + "valid_targets_min": 2303 + }, + { + "epoch": 3.9264475743348983, + "grad_norm": 0.6320246258766229, + "learning_rate": 1.9256506810902226e-05, + "loss": 0.2198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10154462605714798, + "step": 3765, + "valid_targets_mean": 3807.4, + "valid_targets_min": 1360 + }, + { + "epoch": 3.931664058424622, + "grad_norm": 0.5732054702880631, + "learning_rate": 1.9204540815900368e-05, + "loss": 0.2163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12288829684257507, + "step": 3770, + "valid_targets_mean": 4863.2, + "valid_targets_min": 1496 + }, + { + "epoch": 3.9368805425143454, + "grad_norm": 0.6733674262553421, + "learning_rate": 1.915258019913306e-05, + "loss": 0.2172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1237935721874237, + "step": 3775, + "valid_targets_mean": 3466.5, + "valid_targets_min": 1518 + }, + { + "epoch": 3.9420970266040687, + "grad_norm": 0.5976464743405827, + "learning_rate": 1.9100625311914855e-05, + "loss": 0.2181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10284930467605591, + "step": 3780, + "valid_targets_mean": 3997.5, + "valid_targets_min": 1152 + }, + { + "epoch": 3.9473135106937924, + "grad_norm": 0.9176698609285794, + "learning_rate": 1.9048676505521552e-05, + "loss": 0.229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1120360717177391, + "step": 3785, + "valid_targets_mean": 3997.6, + "valid_targets_min": 1598 + }, + { + "epoch": 3.952529994783516, + "grad_norm": 0.6249099207631731, + "learning_rate": 1.8996734131187867e-05, + "loss": 0.2411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10151752829551697, + "step": 3790, + "valid_targets_mean": 3873.8, + "valid_targets_min": 1611 + }, + { + "epoch": 3.9577464788732395, + "grad_norm": 0.675608369729942, + "learning_rate": 1.894479854010499e-05, + "loss": 0.2184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10983134061098099, + "step": 3795, + "valid_targets_mean": 3497.1, + "valid_targets_min": 1674 + }, + { + "epoch": 3.962962962962963, + "grad_norm": 0.6449792367603867, + "learning_rate": 1.8892870083418272e-05, + "loss": 0.2195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09900842607021332, + "step": 3800, + "valid_targets_mean": 2938.8, + "valid_targets_min": 1972 + }, + { + "epoch": 3.9681794470526865, + "grad_norm": 0.6565223829592018, + "learning_rate": 1.8840949112224823e-05, + "loss": 0.2184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10003494471311569, + "step": 3805, + "valid_targets_mean": 3325.9, + "valid_targets_min": 1664 + }, + { + "epoch": 3.97339593114241, + "grad_norm": 0.587474083133146, + "learning_rate": 1.8789035977571145e-05, + "loss": 0.224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08756956458091736, + "step": 3810, + "valid_targets_mean": 4562.4, + "valid_targets_min": 2933 + }, + { + "epoch": 3.9786124152321336, + "grad_norm": 0.625609428896538, + "learning_rate": 1.873713103045076e-05, + "loss": 0.2349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12825317680835724, + "step": 3815, + "valid_targets_mean": 3738.4, + "valid_targets_min": 2418 + }, + { + "epoch": 3.983828899321857, + "grad_norm": 0.6212370604085791, + "learning_rate": 1.8685234621801803e-05, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12228909134864807, + "step": 3820, + "valid_targets_mean": 4120.1, + "valid_targets_min": 2100 + }, + { + "epoch": 3.9890453834115807, + "grad_norm": 0.598974509946414, + "learning_rate": 1.863334710250471e-05, + "loss": 0.2291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10375335812568665, + "step": 3825, + "valid_targets_mean": 3134.8, + "valid_targets_min": 1365 + }, + { + "epoch": 3.994261867501304, + "grad_norm": 0.6892311790240376, + "learning_rate": 1.858146882337981e-05, + "loss": 0.2274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10496819019317627, + "step": 3830, + "valid_targets_mean": 4243.1, + "valid_targets_min": 1572 + }, + { + "epoch": 3.9994783515910277, + "grad_norm": 0.6329982929825083, + "learning_rate": 1.852960013518495e-05, + "loss": 0.2292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11877349019050598, + "step": 3835, + "valid_targets_mean": 3779.2, + "valid_targets_min": 876 + }, + { + "epoch": 4.005216484089724, + "grad_norm": 0.4650879438594853, + "learning_rate": 1.8477741388613132e-05, + "loss": 0.2012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09126079082489014, + "step": 3840, + "valid_targets_mean": 7214.4, + "valid_targets_min": 4847 + }, + { + "epoch": 4.010432968179447, + "grad_norm": 0.4276542646707754, + "learning_rate": 1.8425892934290152e-05, + "loss": 0.1639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08251366019248962, + "step": 3845, + "valid_targets_mean": 6417.1, + "valid_targets_min": 4480 + }, + { + "epoch": 4.01564945226917, + "grad_norm": 0.407307057306664, + "learning_rate": 1.8374055122772195e-05, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08883358538150787, + "step": 3850, + "valid_targets_mean": 7605.4, + "valid_targets_min": 5236 + }, + { + "epoch": 4.020865936358894, + "grad_norm": 0.4543123430762799, + "learning_rate": 1.8322228304543515e-05, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07020661234855652, + "step": 3855, + "valid_targets_mean": 5827.9, + "valid_targets_min": 4727 + }, + { + "epoch": 4.026082420448618, + "grad_norm": 0.45426109728579006, + "learning_rate": 1.827041283001403e-05, + "loss": 0.1728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0950879454612732, + "step": 3860, + "valid_targets_mean": 6448.2, + "valid_targets_min": 4211 + }, + { + "epoch": 4.031298904538341, + "grad_norm": 0.46033528286865044, + "learning_rate": 1.8218609049516943e-05, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09933328628540039, + "step": 3865, + "valid_targets_mean": 6108.1, + "valid_targets_min": 4279 + }, + { + "epoch": 4.0365153886280645, + "grad_norm": 0.403591545039204, + "learning_rate": 1.816681731330641e-05, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07301544398069382, + "step": 3870, + "valid_targets_mean": 5962.4, + "valid_targets_min": 4589 + }, + { + "epoch": 4.041731872717788, + "grad_norm": 0.5702188158389625, + "learning_rate": 1.811503797155516e-05, + "loss": 0.1766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08412884920835495, + "step": 3875, + "valid_targets_mean": 3503.2, + "valid_targets_min": 2015 + }, + { + "epoch": 4.046948356807512, + "grad_norm": 0.40429271008763223, + "learning_rate": 1.8063271374352097e-05, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06856978684663773, + "step": 3880, + "valid_targets_mean": 6102.5, + "valid_targets_min": 4778 + }, + { + "epoch": 4.052164840897235, + "grad_norm": 0.4100057327425552, + "learning_rate": 1.8011517871699978e-05, + "loss": 0.16, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07800818234682083, + "step": 3885, + "valid_targets_mean": 5868.2, + "valid_targets_min": 4612 + }, + { + "epoch": 4.057381324986959, + "grad_norm": 0.4635602884932141, + "learning_rate": 1.7959777813513015e-05, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08860057592391968, + "step": 3890, + "valid_targets_mean": 6105.8, + "valid_targets_min": 4491 + }, + { + "epoch": 4.062597809076682, + "grad_norm": 0.4287874439771452, + "learning_rate": 1.7908051549614527e-05, + "loss": 0.1602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08374327421188354, + "step": 3895, + "valid_targets_mean": 6962.0, + "valid_targets_min": 4754 + }, + { + "epoch": 4.067814293166406, + "grad_norm": 0.45701399548567434, + "learning_rate": 1.7856339429734557e-05, + "loss": 0.157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07526027411222458, + "step": 3900, + "valid_targets_mean": 6035.1, + "valid_targets_min": 4787 + }, + { + "epoch": 4.073030777256129, + "grad_norm": 0.4022157062473113, + "learning_rate": 1.780464180350753e-05, + "loss": 0.1494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07402299344539642, + "step": 3905, + "valid_targets_mean": 6028.5, + "valid_targets_min": 5213 + }, + { + "epoch": 4.078247261345853, + "grad_norm": 0.4082089815180389, + "learning_rate": 1.7752959020469868e-05, + "loss": 0.1501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07084880769252777, + "step": 3910, + "valid_targets_mean": 6814.1, + "valid_targets_min": 4664 + }, + { + "epoch": 4.083463745435576, + "grad_norm": 0.4105877125369313, + "learning_rate": 1.7701291430057653e-05, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07330137491226196, + "step": 3915, + "valid_targets_mean": 6281.5, + "valid_targets_min": 4996 + }, + { + "epoch": 4.0886802295253, + "grad_norm": 0.42644975292657433, + "learning_rate": 1.7649639381604207e-05, + "loss": 0.1586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07498680055141449, + "step": 3920, + "valid_targets_mean": 6628.9, + "valid_targets_min": 4582 + }, + { + "epoch": 4.093896713615023, + "grad_norm": 0.38821075464564736, + "learning_rate": 1.759800322433781e-05, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06528449058532715, + "step": 3925, + "valid_targets_mean": 6536.6, + "valid_targets_min": 5205 + }, + { + "epoch": 4.099113197704747, + "grad_norm": 0.4078738762635929, + "learning_rate": 1.7546383307379284e-05, + "loss": 0.1477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07587917149066925, + "step": 3930, + "valid_targets_mean": 7031.0, + "valid_targets_min": 5098 + }, + { + "epoch": 4.1043296817944706, + "grad_norm": 0.4613143195318644, + "learning_rate": 1.7494779979739645e-05, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09299011528491974, + "step": 3935, + "valid_targets_mean": 6514.2, + "valid_targets_min": 5016 + }, + { + "epoch": 4.109546165884194, + "grad_norm": 0.4325389816232236, + "learning_rate": 1.7443193590317756e-05, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1126585304737091, + "step": 3940, + "valid_targets_mean": 6330.0, + "valid_targets_min": 965 + }, + { + "epoch": 4.114762649973917, + "grad_norm": 0.41005270076952893, + "learning_rate": 1.7391624487897947e-05, + "loss": 0.1496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07645758241415024, + "step": 3945, + "valid_targets_mean": 6293.1, + "valid_targets_min": 5218 + }, + { + "epoch": 4.119979134063641, + "grad_norm": 0.43788123816565, + "learning_rate": 1.734007302114768e-05, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07208527624607086, + "step": 3950, + "valid_targets_mean": 6957.4, + "valid_targets_min": 4213 + }, + { + "epoch": 4.125195618153365, + "grad_norm": 0.4836752805715503, + "learning_rate": 1.728853953861516e-05, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.090875543653965, + "step": 3955, + "valid_targets_mean": 5888.5, + "valid_targets_min": 4923 + }, + { + "epoch": 4.130412102243088, + "grad_norm": 0.44149143717874545, + "learning_rate": 1.723702438872701e-05, + "loss": 0.192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12212929874658585, + "step": 3960, + "valid_targets_mean": 7822.9, + "valid_targets_min": 4993 + }, + { + "epoch": 4.135628586332811, + "grad_norm": 0.42882792511551565, + "learning_rate": 1.718552791978591e-05, + "loss": 0.1849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.086602121591568, + "step": 3965, + "valid_targets_mean": 6098.6, + "valid_targets_min": 4872 + }, + { + "epoch": 4.140845070422535, + "grad_norm": 0.4107891215772432, + "learning_rate": 1.713405047996822e-05, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08377519249916077, + "step": 3970, + "valid_targets_mean": 6892.2, + "valid_targets_min": 4706 + }, + { + "epoch": 4.146061554512259, + "grad_norm": 0.4076223987908001, + "learning_rate": 1.7082592417321647e-05, + "loss": 0.1634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07596518099308014, + "step": 3975, + "valid_targets_mean": 6352.0, + "valid_targets_min": 4889 + }, + { + "epoch": 4.1512780386019825, + "grad_norm": 0.4425495786634171, + "learning_rate": 1.7031154079762885e-05, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07672533392906189, + "step": 3980, + "valid_targets_mean": 5948.9, + "valid_targets_min": 3888 + }, + { + "epoch": 4.156494522691705, + "grad_norm": 0.4991619760445945, + "learning_rate": 1.6979735815075263e-05, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07523427903652191, + "step": 3985, + "valid_targets_mean": 5846.2, + "valid_targets_min": 4051 + }, + { + "epoch": 4.161711006781429, + "grad_norm": 0.3839086939622481, + "learning_rate": 1.6928337970906406e-05, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08400048315525055, + "step": 3990, + "valid_targets_mean": 7319.2, + "valid_targets_min": 5230 + }, + { + "epoch": 4.166927490871153, + "grad_norm": 0.4030637962532526, + "learning_rate": 1.687696089476585e-05, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08593684434890747, + "step": 3995, + "valid_targets_mean": 8235.1, + "valid_targets_min": 4997 + }, + { + "epoch": 4.172143974960877, + "grad_norm": 0.47021582619043967, + "learning_rate": 1.6825604934022734e-05, + "loss": 0.1664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08492124825716019, + "step": 4000, + "valid_targets_mean": 6402.5, + "valid_targets_min": 5364 + }, + { + "epoch": 4.1773604590505995, + "grad_norm": 0.4329667393310629, + "learning_rate": 1.6774270435903423e-05, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09031670540571213, + "step": 4005, + "valid_targets_mean": 7117.4, + "valid_targets_min": 3391 + }, + { + "epoch": 4.182576943140323, + "grad_norm": 0.38288317755911416, + "learning_rate": 1.6722957747489172e-05, + "loss": 0.1424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07646965980529785, + "step": 4010, + "valid_targets_mean": 7542.8, + "valid_targets_min": 4829 + }, + { + "epoch": 4.187793427230047, + "grad_norm": 0.5098619897987144, + "learning_rate": 1.6671667215713784e-05, + "loss": 0.1672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07915721088647842, + "step": 4015, + "valid_targets_mean": 6284.4, + "valid_targets_min": 5180 + }, + { + "epoch": 4.193009911319771, + "grad_norm": 0.44124865084688886, + "learning_rate": 1.6620399187361226e-05, + "loss": 0.1685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08284696936607361, + "step": 4020, + "valid_targets_mean": 5806.2, + "valid_targets_min": 5101 + }, + { + "epoch": 4.198226395409494, + "grad_norm": 0.4226025353713707, + "learning_rate": 1.656915400906336e-05, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07433569431304932, + "step": 4025, + "valid_targets_mean": 6697.9, + "valid_targets_min": 4527 + }, + { + "epoch": 4.203442879499217, + "grad_norm": 0.4542163147134547, + "learning_rate": 1.6517932027297525e-05, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08851119875907898, + "step": 4030, + "valid_targets_mean": 6410.4, + "valid_targets_min": 3082 + }, + { + "epoch": 4.208659363588941, + "grad_norm": 0.4072794844509218, + "learning_rate": 1.6466733588384233e-05, + "loss": 0.1506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07540170848369598, + "step": 4035, + "valid_targets_mean": 6847.1, + "valid_targets_min": 4685 + }, + { + "epoch": 4.213875847678665, + "grad_norm": 0.4724021577577673, + "learning_rate": 1.6415559038484827e-05, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0830809623003006, + "step": 4040, + "valid_targets_mean": 6194.0, + "valid_targets_min": 4644 + }, + { + "epoch": 4.219092331768388, + "grad_norm": 0.416925145939862, + "learning_rate": 1.6364408723599116e-05, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07258963584899902, + "step": 4045, + "valid_targets_mean": 6283.8, + "valid_targets_min": 4433 + }, + { + "epoch": 4.2243088158581115, + "grad_norm": 0.4130332656592124, + "learning_rate": 1.6313282989563062e-05, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07635585963726044, + "step": 4050, + "valid_targets_mean": 5917.2, + "valid_targets_min": 4857 + }, + { + "epoch": 4.229525299947835, + "grad_norm": 0.4554379752938956, + "learning_rate": 1.626218218204643e-05, + "loss": 0.1664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09920241683721542, + "step": 4055, + "valid_targets_mean": 6986.4, + "valid_targets_min": 5101 + }, + { + "epoch": 4.234741784037559, + "grad_norm": 0.4425648251835105, + "learning_rate": 1.6211106646550455e-05, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06709270924329758, + "step": 4060, + "valid_targets_mean": 5299.5, + "valid_targets_min": 4442 + }, + { + "epoch": 4.239958268127282, + "grad_norm": 0.4425113566645608, + "learning_rate": 1.616005672840551e-05, + "loss": 0.1373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059069372713565826, + "step": 4065, + "valid_targets_mean": 6133.9, + "valid_targets_min": 5130 + }, + { + "epoch": 4.245174752217006, + "grad_norm": 0.4203990767410962, + "learning_rate": 1.6109032772768736e-05, + "loss": 0.1414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0834202766418457, + "step": 4070, + "valid_targets_mean": 6477.4, + "valid_targets_min": 4162 + }, + { + "epoch": 4.250391236306729, + "grad_norm": 0.4024186266923904, + "learning_rate": 1.6058035124621766e-05, + "loss": 0.1466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07049587368965149, + "step": 4075, + "valid_targets_mean": 6350.4, + "valid_targets_min": 3940 + }, + { + "epoch": 4.255607720396453, + "grad_norm": 0.4503920304386811, + "learning_rate": 1.6007064128768354e-05, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08962923288345337, + "step": 4080, + "valid_targets_mean": 6625.1, + "valid_targets_min": 4642 + }, + { + "epoch": 4.260824204486176, + "grad_norm": 0.47961381388551877, + "learning_rate": 1.5956120129832054e-05, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09213955700397491, + "step": 4085, + "valid_targets_mean": 4907.0, + "valid_targets_min": 4431 + }, + { + "epoch": 4.2660406885759, + "grad_norm": 0.47228681442914816, + "learning_rate": 1.5905203472253892e-05, + "loss": 0.1877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08642150461673737, + "step": 4090, + "valid_targets_mean": 6465.0, + "valid_targets_min": 5333 + }, + { + "epoch": 4.271257172665623, + "grad_norm": 0.4392698256872824, + "learning_rate": 1.5854314500290025e-05, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08148103952407837, + "step": 4095, + "valid_targets_mean": 5650.5, + "valid_targets_min": 4769 + }, + { + "epoch": 4.276473656755347, + "grad_norm": 0.452714470389549, + "learning_rate": 1.580345355800944e-05, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07886253297328949, + "step": 4100, + "valid_targets_mean": 5540.0, + "valid_targets_min": 4281 + }, + { + "epoch": 4.28169014084507, + "grad_norm": 0.4734971451149016, + "learning_rate": 1.5752620989291592e-05, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11383283138275146, + "step": 4105, + "valid_targets_mean": 6499.0, + "valid_targets_min": 4844 + }, + { + "epoch": 4.286906624934794, + "grad_norm": 0.4267548775423274, + "learning_rate": 1.5701817137824108e-05, + "loss": 0.1747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07103344053030014, + "step": 4110, + "valid_targets_mean": 5736.5, + "valid_targets_min": 5227 + }, + { + "epoch": 4.2921231090245175, + "grad_norm": 0.40044620838415695, + "learning_rate": 1.5651042347100452e-05, + "loss": 0.1588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061747193336486816, + "step": 4115, + "valid_targets_mean": 5857.2, + "valid_targets_min": 2461 + }, + { + "epoch": 4.297339593114241, + "grad_norm": 0.4217187623628326, + "learning_rate": 1.5600296960417597e-05, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0711250901222229, + "step": 4120, + "valid_targets_mean": 5793.4, + "valid_targets_min": 2815 + }, + { + "epoch": 4.302556077203964, + "grad_norm": 0.4013718701421378, + "learning_rate": 1.5549581320873715e-05, + "loss": 0.1623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06912415474653244, + "step": 4125, + "valid_targets_mean": 6588.8, + "valid_targets_min": 4875 + }, + { + "epoch": 4.307772561293688, + "grad_norm": 0.44327196393343526, + "learning_rate": 1.549889577136586e-05, + "loss": 0.1474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0739007219672203, + "step": 4130, + "valid_targets_mean": 5984.8, + "valid_targets_min": 4901 + }, + { + "epoch": 4.312989045383412, + "grad_norm": 0.3915493000776856, + "learning_rate": 1.5448240654587622e-05, + "loss": 0.1572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06956405192613602, + "step": 4135, + "valid_targets_mean": 6067.4, + "valid_targets_min": 3786 + }, + { + "epoch": 4.318205529473135, + "grad_norm": 0.43761928313464077, + "learning_rate": 1.5397616313026853e-05, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07812249660491943, + "step": 4140, + "valid_targets_mean": 5852.5, + "valid_targets_min": 5057 + }, + { + "epoch": 4.323422013562858, + "grad_norm": 0.4945386024720073, + "learning_rate": 1.5347023088963315e-05, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08264617621898651, + "step": 4145, + "valid_targets_mean": 6362.2, + "valid_targets_min": 3952 + }, + { + "epoch": 4.328638497652582, + "grad_norm": 0.48273837665464625, + "learning_rate": 1.5296461324466384e-05, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07927330583333969, + "step": 4150, + "valid_targets_mean": 5360.2, + "valid_targets_min": 4267 + }, + { + "epoch": 4.333854981742306, + "grad_norm": 0.4554094665689619, + "learning_rate": 1.5245931361392727e-05, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09695842862129211, + "step": 4155, + "valid_targets_mean": 6842.1, + "valid_targets_min": 4785 + }, + { + "epoch": 4.3390714658320295, + "grad_norm": 0.4711406135747848, + "learning_rate": 1.5195433541384009e-05, + "loss": 0.1472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07037786394357681, + "step": 4160, + "valid_targets_mean": 6188.5, + "valid_targets_min": 5438 + }, + { + "epoch": 4.344287949921752, + "grad_norm": 0.47775444204108175, + "learning_rate": 1.514496820586456e-05, + "loss": 0.1499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08806880563497543, + "step": 4165, + "valid_targets_mean": 5848.6, + "valid_targets_min": 4734 + }, + { + "epoch": 4.349504434011476, + "grad_norm": 0.3944260738031044, + "learning_rate": 1.5094535696039067e-05, + "loss": 0.1404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06219204515218735, + "step": 4170, + "valid_targets_mean": 6278.2, + "valid_targets_min": 5636 + }, + { + "epoch": 4.3547209181012, + "grad_norm": 0.4288857134453204, + "learning_rate": 1.50441363528903e-05, + "loss": 0.1603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06775358319282532, + "step": 4175, + "valid_targets_mean": 6168.9, + "valid_targets_min": 4206 + }, + { + "epoch": 4.359937402190924, + "grad_norm": 0.42126784727832883, + "learning_rate": 1.4993770517176764e-05, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06789038330316544, + "step": 4180, + "valid_targets_mean": 6226.5, + "valid_targets_min": 4535 + }, + { + "epoch": 4.3651538862806465, + "grad_norm": 0.36270055269761187, + "learning_rate": 1.4943438529430428e-05, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07382744550704956, + "step": 4185, + "valid_targets_mean": 7821.6, + "valid_targets_min": 5471 + }, + { + "epoch": 4.37037037037037, + "grad_norm": 0.41724187496859044, + "learning_rate": 1.4893140729954402e-05, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0657457485795021, + "step": 4190, + "valid_targets_mean": 6843.0, + "valid_targets_min": 5243 + }, + { + "epoch": 4.375586854460094, + "grad_norm": 0.4397731070722032, + "learning_rate": 1.4842877458820644e-05, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08740366995334625, + "step": 4195, + "valid_targets_mean": 6658.6, + "valid_targets_min": 4906 + }, + { + "epoch": 4.380803338549818, + "grad_norm": 0.39972160280777896, + "learning_rate": 1.4792649055867668e-05, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07797618210315704, + "step": 4200, + "valid_targets_mean": 6799.5, + "valid_targets_min": 5168 + }, + { + "epoch": 4.386019822639541, + "grad_norm": 0.46620898066462546, + "learning_rate": 1.474245586069822e-05, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0822484940290451, + "step": 4205, + "valid_targets_mean": 5376.2, + "valid_targets_min": 3130 + }, + { + "epoch": 4.391236306729264, + "grad_norm": 0.5733151840338875, + "learning_rate": 1.4692298212677018e-05, + "loss": 0.1995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11209208518266678, + "step": 4210, + "valid_targets_mean": 5092.5, + "valid_targets_min": 1971 + }, + { + "epoch": 4.396452790818988, + "grad_norm": 0.5191286481443436, + "learning_rate": 1.464217645092843e-05, + "loss": 0.2238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11199919879436493, + "step": 4215, + "valid_targets_mean": 5324.1, + "valid_targets_min": 1631 + }, + { + "epoch": 4.401669274908712, + "grad_norm": 0.585527141595161, + "learning_rate": 1.4592090914334206e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1429136097431183, + "step": 4220, + "valid_targets_mean": 6070.8, + "valid_targets_min": 3294 + }, + { + "epoch": 4.406885758998435, + "grad_norm": 0.5666823280071418, + "learning_rate": 1.4542041941531136e-05, + "loss": 0.2172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09826380014419556, + "step": 4225, + "valid_targets_mean": 4407.1, + "valid_targets_min": 2736 + }, + { + "epoch": 4.4121022430881585, + "grad_norm": 0.6596216247663018, + "learning_rate": 1.4492029870908835e-05, + "loss": 0.2244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08822134882211685, + "step": 4230, + "valid_targets_mean": 3646.2, + "valid_targets_min": 980 + }, + { + "epoch": 4.417318727177882, + "grad_norm": 0.5560028906625156, + "learning_rate": 1.4442055040607402e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1110324114561081, + "step": 4235, + "valid_targets_mean": 5067.1, + "valid_targets_min": 1512 + }, + { + "epoch": 4.422535211267606, + "grad_norm": 0.6396599717200336, + "learning_rate": 1.4392117788515138e-05, + "loss": 0.2166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1114412248134613, + "step": 4240, + "valid_targets_mean": 4779.2, + "valid_targets_min": 2405 + }, + { + "epoch": 4.427751695357329, + "grad_norm": 0.5448255008932227, + "learning_rate": 1.4342218452266284e-05, + "loss": 0.2256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08737234026193619, + "step": 4245, + "valid_targets_mean": 3286.4, + "valid_targets_min": 1654 + }, + { + "epoch": 4.432968179447053, + "grad_norm": 0.4831034643446941, + "learning_rate": 1.4292357369238726e-05, + "loss": 0.2183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10613803565502167, + "step": 4250, + "valid_targets_mean": 5448.2, + "valid_targets_min": 4644 + }, + { + "epoch": 4.438184663536776, + "grad_norm": 0.5797047456580559, + "learning_rate": 1.42425348765517e-05, + "loss": 0.2333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10123090445995331, + "step": 4255, + "valid_targets_mean": 4343.2, + "valid_targets_min": 2003 + }, + { + "epoch": 4.4434011476265, + "grad_norm": 0.48717169851191156, + "learning_rate": 1.4192751311063541e-05, + "loss": 0.2203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11523032188415527, + "step": 4260, + "valid_targets_mean": 6736.4, + "valid_targets_min": 953 + }, + { + "epoch": 4.448617631716223, + "grad_norm": 0.5738580941282053, + "learning_rate": 1.4143007009369388e-05, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10213956236839294, + "step": 4265, + "valid_targets_mean": 4140.9, + "valid_targets_min": 1265 + }, + { + "epoch": 4.453834115805947, + "grad_norm": 0.5587286389943354, + "learning_rate": 1.4093302307798906e-05, + "loss": 0.2234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10021477192640305, + "step": 4270, + "valid_targets_mean": 4903.1, + "valid_targets_min": 1982 + }, + { + "epoch": 4.45905059989567, + "grad_norm": 0.576436970242694, + "learning_rate": 1.4043637542414007e-05, + "loss": 0.2261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09724591672420502, + "step": 4275, + "valid_targets_mean": 4012.9, + "valid_targets_min": 1759 + }, + { + "epoch": 4.464267083985394, + "grad_norm": 0.6383501905597057, + "learning_rate": 1.399401304900661e-05, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10483422130346298, + "step": 4280, + "valid_targets_mean": 3281.4, + "valid_targets_min": 1451 + }, + { + "epoch": 4.469483568075117, + "grad_norm": 0.648372755532651, + "learning_rate": 1.3944429163096332e-05, + "loss": 0.219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11362691223621368, + "step": 4285, + "valid_targets_mean": 3966.2, + "valid_targets_min": 1861 + }, + { + "epoch": 4.474700052164841, + "grad_norm": 0.6549281149489672, + "learning_rate": 1.3894886219928247e-05, + "loss": 0.2132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10499267280101776, + "step": 4290, + "valid_targets_mean": 4078.9, + "valid_targets_min": 2100 + }, + { + "epoch": 4.4799165362545645, + "grad_norm": 0.5998489988113542, + "learning_rate": 1.3845384554470596e-05, + "loss": 0.2222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11616595834493637, + "step": 4295, + "valid_targets_mean": 3775.8, + "valid_targets_min": 2767 + }, + { + "epoch": 4.485133020344288, + "grad_norm": 0.6610153233361293, + "learning_rate": 1.3795924501412542e-05, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12534578144550323, + "step": 4300, + "valid_targets_mean": 3907.5, + "valid_targets_min": 1205 + }, + { + "epoch": 4.490349504434011, + "grad_norm": 0.6162597367563946, + "learning_rate": 1.3746506395161901e-05, + "loss": 0.2161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09281608462333679, + "step": 4305, + "valid_targets_mean": 4297.4, + "valid_targets_min": 2004 + }, + { + "epoch": 4.495565988523735, + "grad_norm": 0.6033627921727935, + "learning_rate": 1.3697130569842874e-05, + "loss": 0.2246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.098073311150074, + "step": 4310, + "valid_targets_mean": 3638.5, + "valid_targets_min": 1020 + }, + { + "epoch": 4.500782472613459, + "grad_norm": 0.6475203962567131, + "learning_rate": 1.3647797359293797e-05, + "loss": 0.2092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12549373507499695, + "step": 4315, + "valid_targets_mean": 4024.0, + "valid_targets_min": 2130 + }, + { + "epoch": 4.505998956703182, + "grad_norm": 0.6141296374343257, + "learning_rate": 1.359850709706489e-05, + "loss": 0.2073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10680737346410751, + "step": 4320, + "valid_targets_mean": 4524.0, + "valid_targets_min": 2046 + }, + { + "epoch": 4.511215440792905, + "grad_norm": 0.6713459659201054, + "learning_rate": 1.354926011641596e-05, + "loss": 0.2103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14310236275196075, + "step": 4325, + "valid_targets_mean": 4543.6, + "valid_targets_min": 3373 + }, + { + "epoch": 4.516431924882629, + "grad_norm": 0.6506527556958964, + "learning_rate": 1.3500056750314215e-05, + "loss": 0.2188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10186170041561127, + "step": 4330, + "valid_targets_mean": 3376.9, + "valid_targets_min": 1747 + }, + { + "epoch": 4.521648408972353, + "grad_norm": 0.6326328025884154, + "learning_rate": 1.345089733143196e-05, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10249774903059006, + "step": 4335, + "valid_targets_mean": 3227.2, + "valid_targets_min": 1570 + }, + { + "epoch": 4.5268648930620765, + "grad_norm": 0.6275717352660982, + "learning_rate": 1.3401782192144372e-05, + "loss": 0.2083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11434634029865265, + "step": 4340, + "valid_targets_mean": 4808.2, + "valid_targets_min": 1537 + }, + { + "epoch": 4.532081377151799, + "grad_norm": 0.571809461327169, + "learning_rate": 1.3352711664527242e-05, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09125201404094696, + "step": 4345, + "valid_targets_mean": 4261.5, + "valid_targets_min": 1934 + }, + { + "epoch": 4.537297861241523, + "grad_norm": 0.6373497557587361, + "learning_rate": 1.3303686080354739e-05, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0951399952173233, + "step": 4350, + "valid_targets_mean": 3327.4, + "valid_targets_min": 2562 + }, + { + "epoch": 4.542514345331247, + "grad_norm": 0.6582421226956179, + "learning_rate": 1.3254705771097154e-05, + "loss": 0.2107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11322943866252899, + "step": 4355, + "valid_targets_mean": 4351.6, + "valid_targets_min": 2659 + }, + { + "epoch": 4.547730829420971, + "grad_norm": 0.6112569594570527, + "learning_rate": 1.3205771067918675e-05, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09720960259437561, + "step": 4360, + "valid_targets_mean": 3976.4, + "valid_targets_min": 1349 + }, + { + "epoch": 4.5529473135106935, + "grad_norm": 0.7425887055988142, + "learning_rate": 1.3156882301675133e-05, + "loss": 0.2121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08027955144643784, + "step": 4365, + "valid_targets_mean": 2862.2, + "valid_targets_min": 1367 + }, + { + "epoch": 4.558163797600417, + "grad_norm": 0.6821598753094497, + "learning_rate": 1.3108039802911776e-05, + "loss": 0.2161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10562984645366669, + "step": 4370, + "valid_targets_mean": 3540.0, + "valid_targets_min": 2026 + }, + { + "epoch": 4.563380281690141, + "grad_norm": 0.6429406748174424, + "learning_rate": 1.3059243901861024e-05, + "loss": 0.2012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08766038715839386, + "step": 4375, + "valid_targets_mean": 3112.6, + "valid_targets_min": 2033 + }, + { + "epoch": 4.568596765779865, + "grad_norm": 0.7656931734996429, + "learning_rate": 1.3010494928440248e-05, + "loss": 0.2191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1176757737994194, + "step": 4380, + "valid_targets_mean": 2677.6, + "valid_targets_min": 1285 + }, + { + "epoch": 4.573813249869588, + "grad_norm": 0.8042533572060415, + "learning_rate": 1.2961793212249527e-05, + "loss": 0.2131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12352727353572845, + "step": 4385, + "valid_targets_mean": 2929.1, + "valid_targets_min": 1821 + }, + { + "epoch": 4.579029733959311, + "grad_norm": 0.7796373921597968, + "learning_rate": 1.2913139082569434e-05, + "loss": 0.2102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10791006684303284, + "step": 4390, + "valid_targets_mean": 3896.9, + "valid_targets_min": 2535 + }, + { + "epoch": 4.584246218049035, + "grad_norm": 0.7082258655459408, + "learning_rate": 1.2864532868358799e-05, + "loss": 0.209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11081019043922424, + "step": 4395, + "valid_targets_mean": 3107.6, + "valid_targets_min": 2080 + }, + { + "epoch": 4.589462702138759, + "grad_norm": 0.7018007832412342, + "learning_rate": 1.2815974898252482e-05, + "loss": 0.2151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1026323139667511, + "step": 4400, + "valid_targets_mean": 2895.4, + "valid_targets_min": 1153 + }, + { + "epoch": 4.594679186228482, + "grad_norm": 0.6273822057525007, + "learning_rate": 1.2767465500559162e-05, + "loss": 0.2081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10037755966186523, + "step": 4405, + "valid_targets_mean": 3788.0, + "valid_targets_min": 1858 + }, + { + "epoch": 4.5998956703182055, + "grad_norm": 0.6572616409380438, + "learning_rate": 1.271900500325911e-05, + "loss": 0.2161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09604296833276749, + "step": 4410, + "valid_targets_mean": 3456.4, + "valid_targets_min": 1639 + }, + { + "epoch": 4.605112154407929, + "grad_norm": 0.6669218001359545, + "learning_rate": 1.2670593734001972e-05, + "loss": 0.2121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07522930204868317, + "step": 4415, + "valid_targets_mean": 2912.5, + "valid_targets_min": 1394 + }, + { + "epoch": 4.610328638497653, + "grad_norm": 0.6781043997211057, + "learning_rate": 1.2622232020104568e-05, + "loss": 0.2145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1100131943821907, + "step": 4420, + "valid_targets_mean": 4036.0, + "valid_targets_min": 1844 + }, + { + "epoch": 4.615545122587376, + "grad_norm": 0.6581656253658736, + "learning_rate": 1.2573920188548634e-05, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12665978074073792, + "step": 4425, + "valid_targets_mean": 4271.6, + "valid_targets_min": 1734 + }, + { + "epoch": 4.6207616066771, + "grad_norm": 0.6558280449068744, + "learning_rate": 1.2525658565978668e-05, + "loss": 0.2086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11578554660081863, + "step": 4430, + "valid_targets_mean": 4111.2, + "valid_targets_min": 2077 + }, + { + "epoch": 4.625978090766823, + "grad_norm": 0.6546624881929699, + "learning_rate": 1.2477447478699696e-05, + "loss": 0.209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1152484118938446, + "step": 4435, + "valid_targets_mean": 4209.2, + "valid_targets_min": 2349 + }, + { + "epoch": 4.631194574856547, + "grad_norm": 0.6645272050341419, + "learning_rate": 1.2429287252675062e-05, + "loss": 0.2117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11969613283872604, + "step": 4440, + "valid_targets_mean": 4188.0, + "valid_targets_min": 1607 + }, + { + "epoch": 4.63641105894627, + "grad_norm": 0.7241666438179849, + "learning_rate": 1.2381178213524223e-05, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10624804347753525, + "step": 4445, + "valid_targets_mean": 3289.2, + "valid_targets_min": 1507 + }, + { + "epoch": 4.641627543035994, + "grad_norm": 0.6361462623489207, + "learning_rate": 1.2333120686520568e-05, + "loss": 0.2134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10122013092041016, + "step": 4450, + "valid_targets_mean": 3658.6, + "valid_targets_min": 1902 + }, + { + "epoch": 4.646844027125717, + "grad_norm": 0.6382760886743402, + "learning_rate": 1.2285114996589181e-05, + "loss": 0.209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09632793068885803, + "step": 4455, + "valid_targets_mean": 3230.1, + "valid_targets_min": 654 + }, + { + "epoch": 4.652060511215441, + "grad_norm": 0.6411521015193199, + "learning_rate": 1.2237161468304681e-05, + "loss": 0.2131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10382625460624695, + "step": 4460, + "valid_targets_mean": 3870.0, + "valid_targets_min": 2607 + }, + { + "epoch": 4.657276995305164, + "grad_norm": 0.6184593517246503, + "learning_rate": 1.218926042588902e-05, + "loss": 0.2118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11404374241828918, + "step": 4465, + "valid_targets_mean": 4594.5, + "valid_targets_min": 1739 + }, + { + "epoch": 4.662493479394888, + "grad_norm": 0.6312782886732488, + "learning_rate": 1.2141412193209274e-05, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09956435114145279, + "step": 4470, + "valid_targets_mean": 3615.8, + "valid_targets_min": 1140 + }, + { + "epoch": 4.6677099634846115, + "grad_norm": 0.6334216051594609, + "learning_rate": 1.2093617093775458e-05, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1022881492972374, + "step": 4475, + "valid_targets_mean": 3535.9, + "valid_targets_min": 2501 + }, + { + "epoch": 4.672926447574335, + "grad_norm": 0.6768857527355584, + "learning_rate": 1.2045875450738352e-05, + "loss": 0.2089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10517385601997375, + "step": 4480, + "valid_targets_mean": 4050.4, + "valid_targets_min": 1298 + }, + { + "epoch": 4.678142931664058, + "grad_norm": 0.5898476228165662, + "learning_rate": 1.199818758688731e-05, + "loss": 0.1894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10735522210597992, + "step": 4485, + "valid_targets_mean": 4874.1, + "valid_targets_min": 3357 + }, + { + "epoch": 4.683359415753782, + "grad_norm": 0.6498217243810555, + "learning_rate": 1.1950553824648077e-05, + "loss": 0.2235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10738418996334076, + "step": 4490, + "valid_targets_mean": 3619.9, + "valid_targets_min": 1726 + }, + { + "epoch": 4.688575899843506, + "grad_norm": 0.674561582367286, + "learning_rate": 1.1902974486080599e-05, + "loss": 0.2086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08980332314968109, + "step": 4495, + "valid_targets_mean": 3198.0, + "valid_targets_min": 1743 + }, + { + "epoch": 4.6937923839332285, + "grad_norm": 0.6193054270954791, + "learning_rate": 1.1855449892876858e-05, + "loss": 0.2108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10802572965621948, + "step": 4500, + "valid_targets_mean": 3788.8, + "valid_targets_min": 1234 + }, + { + "epoch": 4.699008868022952, + "grad_norm": 0.6331231066071324, + "learning_rate": 1.1807980366358699e-05, + "loss": 0.2147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11651439964771271, + "step": 4505, + "valid_targets_mean": 4123.4, + "valid_targets_min": 2303 + }, + { + "epoch": 4.704225352112676, + "grad_norm": 0.6277837491738062, + "learning_rate": 1.1760566227475642e-05, + "loss": 0.2137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10604351758956909, + "step": 4510, + "valid_targets_mean": 4112.8, + "valid_targets_min": 1651 + }, + { + "epoch": 4.7094418362024, + "grad_norm": 0.6952079093415012, + "learning_rate": 1.1713207796802739e-05, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10713706910610199, + "step": 4515, + "valid_targets_mean": 2986.9, + "valid_targets_min": 1374 + }, + { + "epoch": 4.7146583202921235, + "grad_norm": 0.6791142028141507, + "learning_rate": 1.166590539453837e-05, + "loss": 0.2017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11282327771186829, + "step": 4520, + "valid_targets_mean": 4170.5, + "valid_targets_min": 1625 + }, + { + "epoch": 4.719874804381846, + "grad_norm": 0.6088452479362633, + "learning_rate": 1.1618659340502104e-05, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10967598855495453, + "step": 4525, + "valid_targets_mean": 4429.6, + "valid_targets_min": 1966 + }, + { + "epoch": 4.72509128847157, + "grad_norm": 0.6569963280216358, + "learning_rate": 1.157146995413252e-05, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10143765807151794, + "step": 4530, + "valid_targets_mean": 3639.9, + "valid_targets_min": 1561 + }, + { + "epoch": 4.730307772561294, + "grad_norm": 0.6431782743630451, + "learning_rate": 1.152433755448509e-05, + "loss": 0.196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10531148314476013, + "step": 4535, + "valid_targets_mean": 3708.9, + "valid_targets_min": 1444 + }, + { + "epoch": 4.735524256651017, + "grad_norm": 0.665693200948245, + "learning_rate": 1.1477262460229945e-05, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07844926416873932, + "step": 4540, + "valid_targets_mean": 2605.8, + "valid_targets_min": 1504 + }, + { + "epoch": 4.7407407407407405, + "grad_norm": 0.6369076043126971, + "learning_rate": 1.143024498964981e-05, + "loss": 0.1895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0930037871003151, + "step": 4545, + "valid_targets_mean": 3523.2, + "valid_targets_min": 858 + }, + { + "epoch": 4.745957224830464, + "grad_norm": 0.719054047998916, + "learning_rate": 1.1383285460637766e-05, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11604855209589005, + "step": 4550, + "valid_targets_mean": 3901.8, + "valid_targets_min": 1288 + }, + { + "epoch": 4.751173708920188, + "grad_norm": 0.6397252676608315, + "learning_rate": 1.1336384190695172e-05, + "loss": 0.2222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09515132009983063, + "step": 4555, + "valid_targets_mean": 3838.9, + "valid_targets_min": 1537 + }, + { + "epoch": 4.756390193009912, + "grad_norm": 0.6527658371129172, + "learning_rate": 1.1289541496929466e-05, + "loss": 0.1971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10952002555131912, + "step": 4560, + "valid_targets_mean": 3873.1, + "valid_targets_min": 2972 + }, + { + "epoch": 4.761606677099635, + "grad_norm": 0.842112795685009, + "learning_rate": 1.1242757696052044e-05, + "loss": 0.2162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14111527800559998, + "step": 4565, + "valid_targets_mean": 4917.1, + "valid_targets_min": 2013 + }, + { + "epoch": 4.766823161189358, + "grad_norm": 0.6966910734316851, + "learning_rate": 1.1196033104376141e-05, + "loss": 0.2005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10164788365364075, + "step": 4570, + "valid_targets_mean": 3359.1, + "valid_targets_min": 1744 + }, + { + "epoch": 4.772039645279082, + "grad_norm": 0.6108802030015134, + "learning_rate": 1.1149368037814644e-05, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08836719393730164, + "step": 4575, + "valid_targets_mean": 3980.5, + "valid_targets_min": 1914 + }, + { + "epoch": 4.777256129368805, + "grad_norm": 0.7082996945794314, + "learning_rate": 1.1102762811877974e-05, + "loss": 0.2005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08034937083721161, + "step": 4580, + "valid_targets_mean": 3546.5, + "valid_targets_min": 2414 + }, + { + "epoch": 4.782472613458529, + "grad_norm": 0.6007709985108861, + "learning_rate": 1.1056217741672e-05, + "loss": 0.195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11096403002738953, + "step": 4585, + "valid_targets_mean": 4299.2, + "valid_targets_min": 1937 + }, + { + "epoch": 4.7876890975482524, + "grad_norm": 0.6887640419278492, + "learning_rate": 1.1009733141895823e-05, + "loss": 0.2089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10638195276260376, + "step": 4590, + "valid_targets_mean": 3179.6, + "valid_targets_min": 1649 + }, + { + "epoch": 4.792905581637976, + "grad_norm": 0.6704722529758658, + "learning_rate": 1.0963309326839708e-05, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08831895887851715, + "step": 4595, + "valid_targets_mean": 2905.0, + "valid_targets_min": 1463 + }, + { + "epoch": 4.7981220657277, + "grad_norm": 0.7474741839463412, + "learning_rate": 1.0916946610382966e-05, + "loss": 0.2058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09463998675346375, + "step": 4600, + "valid_targets_mean": 3069.8, + "valid_targets_min": 1426 + }, + { + "epoch": 4.803338549817423, + "grad_norm": 0.6954275208866936, + "learning_rate": 1.0870645305991772e-05, + "loss": 0.2151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08979621529579163, + "step": 4605, + "valid_targets_mean": 3046.1, + "valid_targets_min": 1661 + }, + { + "epoch": 4.808555033907147, + "grad_norm": 0.5978418590274945, + "learning_rate": 1.0824405726717119e-05, + "loss": 0.2086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11404362320899963, + "step": 4610, + "valid_targets_mean": 4702.8, + "valid_targets_min": 1646 + }, + { + "epoch": 4.81377151799687, + "grad_norm": 0.7157810015746385, + "learning_rate": 1.0778228185192639e-05, + "loss": 0.2224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1139599159359932, + "step": 4615, + "valid_targets_mean": 3419.0, + "valid_targets_min": 1450 + }, + { + "epoch": 4.818988002086593, + "grad_norm": 0.7110309930394402, + "learning_rate": 1.0732112993632539e-05, + "loss": 0.2045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09629866480827332, + "step": 4620, + "valid_targets_mean": 3187.5, + "valid_targets_min": 1625 + }, + { + "epoch": 4.824204486176317, + "grad_norm": 0.6349935436331033, + "learning_rate": 1.0686060463829451e-05, + "loss": 0.2193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0836082249879837, + "step": 4625, + "valid_targets_mean": 3247.6, + "valid_targets_min": 1174 + }, + { + "epoch": 4.829420970266041, + "grad_norm": 0.6626858086576323, + "learning_rate": 1.0640070907152342e-05, + "loss": 0.2133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12210780382156372, + "step": 4630, + "valid_targets_mean": 4418.8, + "valid_targets_min": 2949 + }, + { + "epoch": 4.834637454355764, + "grad_norm": 0.6083124992397748, + "learning_rate": 1.0594144634544405e-05, + "loss": 0.2127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12354354560375214, + "step": 4635, + "valid_targets_mean": 5217.8, + "valid_targets_min": 2652 + }, + { + "epoch": 4.839853938445488, + "grad_norm": 0.6667279180753407, + "learning_rate": 1.0548281956520978e-05, + "loss": 0.2101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09638375043869019, + "step": 4640, + "valid_targets_mean": 3643.0, + "valid_targets_min": 1381 + }, + { + "epoch": 4.845070422535211, + "grad_norm": 0.666004660485656, + "learning_rate": 1.0502483183167395e-05, + "loss": 0.2138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10104548931121826, + "step": 4645, + "valid_targets_mean": 4266.9, + "valid_targets_min": 637 + }, + { + "epoch": 4.850286906624935, + "grad_norm": 0.6446888009563501, + "learning_rate": 1.0456748624136951e-05, + "loss": 0.2074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08700317144393921, + "step": 4650, + "valid_targets_mean": 3250.2, + "valid_targets_min": 719 + }, + { + "epoch": 4.8555033907146585, + "grad_norm": 0.617336395731869, + "learning_rate": 1.0411078588648756e-05, + "loss": 0.2128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08491119742393494, + "step": 4655, + "valid_targets_mean": 4282.4, + "valid_targets_min": 3210 + }, + { + "epoch": 4.860719874804381, + "grad_norm": 0.6796065544530192, + "learning_rate": 1.0365473385485662e-05, + "loss": 0.2018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10865994542837143, + "step": 4660, + "valid_targets_mean": 4079.6, + "valid_targets_min": 1372 + }, + { + "epoch": 4.865936358894105, + "grad_norm": 0.7370308887896336, + "learning_rate": 1.0319933322992206e-05, + "loss": 0.1991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12062433362007141, + "step": 4665, + "valid_targets_mean": 3904.6, + "valid_targets_min": 2652 + }, + { + "epoch": 4.871152842983829, + "grad_norm": 0.6441184803288257, + "learning_rate": 1.0274458709072459e-05, + "loss": 0.2051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0874616950750351, + "step": 4670, + "valid_targets_mean": 2972.8, + "valid_targets_min": 1576 + }, + { + "epoch": 4.876369327073553, + "grad_norm": 0.6369864013448258, + "learning_rate": 1.022904985118803e-05, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11121967434883118, + "step": 4675, + "valid_targets_mean": 4349.8, + "valid_targets_min": 1841 + }, + { + "epoch": 4.881585811163276, + "grad_norm": 0.6814158954758651, + "learning_rate": 1.0183707056355883e-05, + "loss": 0.2161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10241025686264038, + "step": 4680, + "valid_targets_mean": 3644.6, + "valid_targets_min": 1728 + }, + { + "epoch": 4.886802295252999, + "grad_norm": 0.7607554739390735, + "learning_rate": 1.0138430631146372e-05, + "loss": 0.2075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11465679854154587, + "step": 4685, + "valid_targets_mean": 3767.8, + "valid_targets_min": 1362 + }, + { + "epoch": 4.892018779342723, + "grad_norm": 0.6099260791746995, + "learning_rate": 1.009322088168108e-05, + "loss": 0.2119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10430841147899628, + "step": 4690, + "valid_targets_mean": 3784.6, + "valid_targets_min": 852 + }, + { + "epoch": 4.897235263432447, + "grad_norm": 0.710598441823825, + "learning_rate": 1.0048078113630806e-05, + "loss": 0.209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09530268609523773, + "step": 4695, + "valid_targets_mean": 2665.8, + "valid_targets_min": 1195 + }, + { + "epoch": 4.90245174752217, + "grad_norm": 0.6600044000063401, + "learning_rate": 1.0003002632213455e-05, + "loss": 0.2117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12671677768230438, + "step": 4700, + "valid_targets_mean": 5114.6, + "valid_targets_min": 2264 + }, + { + "epoch": 4.907668231611893, + "grad_norm": 0.6575430138488817, + "learning_rate": 9.95799474219202e-06, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08720409870147705, + "step": 4705, + "valid_targets_mean": 3751.8, + "valid_targets_min": 1975 + }, + { + "epoch": 4.912884715701617, + "grad_norm": 0.748803937393059, + "learning_rate": 9.913054747872473e-06, + "loss": 0.198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12070177495479584, + "step": 4710, + "valid_targets_mean": 4188.6, + "valid_targets_min": 2426 + }, + { + "epoch": 4.918101199791341, + "grad_norm": 0.5803462810049237, + "learning_rate": 9.868182953101754e-06, + "loss": 0.2042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10536220669746399, + "step": 4715, + "valid_targets_mean": 6574.2, + "valid_targets_min": 2595 + }, + { + "epoch": 4.923317683881065, + "grad_norm": 0.6353067917363707, + "learning_rate": 9.823379661265677e-06, + "loss": 0.204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10645771771669388, + "step": 4720, + "valid_targets_mean": 3589.6, + "valid_targets_min": 2826 + }, + { + "epoch": 4.9285341679707875, + "grad_norm": 0.6610624924389779, + "learning_rate": 9.778645175286904e-06, + "loss": 0.197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08612782508134842, + "step": 4725, + "valid_targets_mean": 3107.8, + "valid_targets_min": 993 + }, + { + "epoch": 4.933750652060511, + "grad_norm": 0.6277025326743476, + "learning_rate": 9.733979797622874e-06, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09507657587528229, + "step": 4730, + "valid_targets_mean": 3915.2, + "valid_targets_min": 1575 + }, + { + "epoch": 4.938967136150235, + "grad_norm": 0.6653166084597842, + "learning_rate": 9.689383830263808e-06, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11884702742099762, + "step": 4735, + "valid_targets_mean": 3818.4, + "valid_targets_min": 1360 + }, + { + "epoch": 4.944183620239958, + "grad_norm": 0.635210359795573, + "learning_rate": 9.6448575747306e-06, + "loss": 0.1967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0893256664276123, + "step": 4740, + "valid_targets_mean": 3653.1, + "valid_targets_min": 2414 + }, + { + "epoch": 4.949400104329682, + "grad_norm": 0.6445728023042442, + "learning_rate": 9.60040133207281e-06, + "loss": 0.2189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11989717930555344, + "step": 4745, + "valid_targets_mean": 4546.2, + "valid_targets_min": 2717 + }, + { + "epoch": 4.954616588419405, + "grad_norm": 0.781249400207297, + "learning_rate": 9.556015402866661e-06, + "loss": 0.2066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10381810367107391, + "step": 4750, + "valid_targets_mean": 3169.8, + "valid_targets_min": 1720 + }, + { + "epoch": 4.959833072509129, + "grad_norm": 0.7205709928372828, + "learning_rate": 9.511700087212934e-06, + "loss": 0.203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10782009363174438, + "step": 4755, + "valid_targets_mean": 3068.8, + "valid_targets_min": 1278 + }, + { + "epoch": 4.965049556598853, + "grad_norm": 0.7155942114423168, + "learning_rate": 9.467455684735015e-06, + "loss": 0.1924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09531857073307037, + "step": 4760, + "valid_targets_mean": 2745.8, + "valid_targets_min": 1858 + }, + { + "epoch": 4.970266040688576, + "grad_norm": 0.7631999253501003, + "learning_rate": 9.423282494576804e-06, + "loss": 0.2087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09901231527328491, + "step": 4765, + "valid_targets_mean": 3250.5, + "valid_targets_min": 1254 + }, + { + "epoch": 4.975482524778299, + "grad_norm": 0.7235181535119036, + "learning_rate": 9.379180815400753e-06, + "loss": 0.206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11523483693599701, + "step": 4770, + "valid_targets_mean": 2633.0, + "valid_targets_min": 1039 + }, + { + "epoch": 4.980699008868023, + "grad_norm": 0.6414572658267368, + "learning_rate": 9.33515094538579e-06, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11133639514446259, + "step": 4775, + "valid_targets_mean": 4942.8, + "valid_targets_min": 2734 + }, + { + "epoch": 4.985915492957746, + "grad_norm": 0.6163999496508673, + "learning_rate": 9.291193182225341e-06, + "loss": 0.2053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10578399151563644, + "step": 4780, + "valid_targets_mean": 3410.4, + "valid_targets_min": 809 + }, + { + "epoch": 4.99113197704747, + "grad_norm": 0.6535987304414527, + "learning_rate": 9.247307823125296e-06, + "loss": 0.2073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08827577531337738, + "step": 4785, + "valid_targets_mean": 3692.1, + "valid_targets_min": 1974 + }, + { + "epoch": 4.9963484611371936, + "grad_norm": 0.6489432876836929, + "learning_rate": 9.203495164802027e-06, + "loss": 0.2103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10980746150016785, + "step": 4790, + "valid_targets_mean": 4170.4, + "valid_targets_min": 2179 + }, + { + "epoch": 5.001043296817945, + "grad_norm": 0.5438954577063421, + "learning_rate": 9.159755503480345e-06, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08621600270271301, + "step": 4795, + "valid_targets_mean": 6935.0, + "valid_targets_min": 5295 + }, + { + "epoch": 5.006259780907668, + "grad_norm": 0.5286711841477857, + "learning_rate": 9.116089134891532e-06, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09281063824892044, + "step": 4800, + "valid_targets_mean": 7672.9, + "valid_targets_min": 5066 + }, + { + "epoch": 5.011476264997392, + "grad_norm": 0.44697585021380143, + "learning_rate": 9.072496354271307e-06, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07966761291027069, + "step": 4805, + "valid_targets_mean": 7347.6, + "valid_targets_min": 4923 + }, + { + "epoch": 5.016692749087115, + "grad_norm": 0.44186171930392454, + "learning_rate": 9.028977456357872e-06, + "loss": 0.1492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07205385714769363, + "step": 4810, + "valid_targets_mean": 5446.8, + "valid_targets_min": 3922 + }, + { + "epoch": 5.021909233176839, + "grad_norm": 0.41577792544011244, + "learning_rate": 8.985532735389873e-06, + "loss": 0.1489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07472500950098038, + "step": 4815, + "valid_targets_mean": 6924.9, + "valid_targets_min": 4797 + }, + { + "epoch": 5.027125717266562, + "grad_norm": 0.4478606717081893, + "learning_rate": 8.942162485104436e-06, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08218589425086975, + "step": 4820, + "valid_targets_mean": 6443.5, + "valid_targets_min": 3853 + }, + { + "epoch": 5.032342201356286, + "grad_norm": 0.4652561128051884, + "learning_rate": 8.898866998735195e-06, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07762619853019714, + "step": 4825, + "valid_targets_mean": 5446.5, + "valid_targets_min": 4487 + }, + { + "epoch": 5.037558685446009, + "grad_norm": 0.452496772699848, + "learning_rate": 8.85564656901028e-06, + "loss": 0.1483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07401534914970398, + "step": 4830, + "valid_targets_mean": 6210.2, + "valid_targets_min": 4137 + }, + { + "epoch": 5.042775169535733, + "grad_norm": 0.5470681219613842, + "learning_rate": 8.81250148815035e-06, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12600824236869812, + "step": 4835, + "valid_targets_mean": 5404.4, + "valid_targets_min": 698 + }, + { + "epoch": 5.0479916536254565, + "grad_norm": 0.43364076459056766, + "learning_rate": 8.769432047866608e-06, + "loss": 0.1491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08941107243299484, + "step": 4840, + "valid_targets_mean": 7197.6, + "valid_targets_min": 4499 + }, + { + "epoch": 5.05320813771518, + "grad_norm": 0.4772499117859291, + "learning_rate": 8.72643853935887e-06, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11071843653917313, + "step": 4845, + "valid_targets_mean": 5538.4, + "valid_targets_min": 4291 + }, + { + "epoch": 5.058424621804903, + "grad_norm": 0.43229460514478857, + "learning_rate": 8.683521253313527e-06, + "loss": 0.1473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06051997095346451, + "step": 4850, + "valid_targets_mean": 5531.4, + "valid_targets_min": 4050 + }, + { + "epoch": 5.063641105894627, + "grad_norm": 0.4281359213050849, + "learning_rate": 8.640680479901648e-06, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08728465437889099, + "step": 4855, + "valid_targets_mean": 7526.6, + "valid_targets_min": 5196 + }, + { + "epoch": 5.068857589984351, + "grad_norm": 0.4276357754385637, + "learning_rate": 8.597916508776958e-06, + "loss": 0.1433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07181344926357269, + "step": 4860, + "valid_targets_mean": 6248.9, + "valid_targets_min": 4749 + }, + { + "epoch": 5.074074074074074, + "grad_norm": 0.46190388417607847, + "learning_rate": 8.55522962907394e-06, + "loss": 0.1428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039490584284067154, + "step": 4865, + "valid_targets_mean": 3248.5, + "valid_targets_min": 1908 + }, + { + "epoch": 5.079290558163797, + "grad_norm": 0.43027869204527813, + "learning_rate": 8.512620129405816e-06, + "loss": 0.1412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07119221985340118, + "step": 4870, + "valid_targets_mean": 6225.6, + "valid_targets_min": 4808 + }, + { + "epoch": 5.084507042253521, + "grad_norm": 0.4322475384459939, + "learning_rate": 8.470088297862669e-06, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07681924104690552, + "step": 4875, + "valid_targets_mean": 5383.0, + "valid_targets_min": 3774 + }, + { + "epoch": 5.089723526343245, + "grad_norm": 0.43783377171224774, + "learning_rate": 8.427634422009399e-06, + "loss": 0.1483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07435213774442673, + "step": 4880, + "valid_targets_mean": 6291.4, + "valid_targets_min": 5335 + }, + { + "epoch": 5.0949400104329685, + "grad_norm": 0.4062255474996269, + "learning_rate": 8.385258788883889e-06, + "loss": 0.1333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07461273670196533, + "step": 4885, + "valid_targets_mean": 7749.1, + "valid_targets_min": 6205 + }, + { + "epoch": 5.100156494522691, + "grad_norm": 0.6566129450114484, + "learning_rate": 8.342961684994975e-06, + "loss": 0.142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07283207029104233, + "step": 4890, + "valid_targets_mean": 6882.1, + "valid_targets_min": 4428 + }, + { + "epoch": 5.105372978612415, + "grad_norm": 0.4601148693332064, + "learning_rate": 8.300743396320566e-06, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08039283007383347, + "step": 4895, + "valid_targets_mean": 5751.8, + "valid_targets_min": 4458 + }, + { + "epoch": 5.110589462702139, + "grad_norm": 0.4025078871548283, + "learning_rate": 8.25860420830567e-06, + "loss": 0.1624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06683515757322311, + "step": 4900, + "valid_targets_mean": 6003.9, + "valid_targets_min": 4458 + }, + { + "epoch": 5.115805946791863, + "grad_norm": 0.4566169429403071, + "learning_rate": 8.216544405860482e-06, + "loss": 0.144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06709788739681244, + "step": 4905, + "valid_targets_mean": 6033.8, + "valid_targets_min": 4066 + }, + { + "epoch": 5.1210224308815855, + "grad_norm": 0.4576662410440266, + "learning_rate": 8.17456427335848e-06, + "loss": 0.1516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08414022624492645, + "step": 4910, + "valid_targets_mean": 6601.0, + "valid_targets_min": 4708 + }, + { + "epoch": 5.126238914971309, + "grad_norm": 0.5615271422896749, + "learning_rate": 8.132664094634452e-06, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0873684287071228, + "step": 4915, + "valid_targets_mean": 6092.1, + "valid_targets_min": 4789 + }, + { + "epoch": 5.131455399061033, + "grad_norm": 0.44409464100157714, + "learning_rate": 8.090844152982628e-06, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08679986745119095, + "step": 4920, + "valid_targets_mean": 6474.6, + "valid_targets_min": 4224 + }, + { + "epoch": 5.136671883150757, + "grad_norm": 0.42454711312507465, + "learning_rate": 8.049104731154722e-06, + "loss": 0.1716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08170602470636368, + "step": 4925, + "valid_targets_mean": 6312.4, + "valid_targets_min": 4617 + }, + { + "epoch": 5.14188836724048, + "grad_norm": 0.4492139911991501, + "learning_rate": 8.007446111358066e-06, + "loss": 0.1521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07869896292686462, + "step": 4930, + "valid_targets_mean": 6333.8, + "valid_targets_min": 4783 + }, + { + "epoch": 5.147104851330203, + "grad_norm": 0.41141793509013574, + "learning_rate": 7.965868575253632e-06, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07833854854106903, + "step": 4935, + "valid_targets_mean": 7380.1, + "valid_targets_min": 6020 + }, + { + "epoch": 5.152321335419927, + "grad_norm": 0.5156134915986333, + "learning_rate": 7.92437240395422e-06, + "loss": 0.1591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08218593895435333, + "step": 4940, + "valid_targets_mean": 6009.0, + "valid_targets_min": 3049 + }, + { + "epoch": 5.157537819509651, + "grad_norm": 0.4347547672422289, + "learning_rate": 7.882957878022472e-06, + "loss": 0.1543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07091741263866425, + "step": 4945, + "valid_targets_mean": 5973.1, + "valid_targets_min": 4822 + }, + { + "epoch": 5.162754303599374, + "grad_norm": 0.47543636632583747, + "learning_rate": 7.841625277469043e-06, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07464339584112167, + "step": 4950, + "valid_targets_mean": 5754.9, + "valid_targets_min": 2910 + }, + { + "epoch": 5.1679707876890975, + "grad_norm": 0.5214860948373861, + "learning_rate": 7.800374881750644e-06, + "loss": 0.1627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08984633535146713, + "step": 4955, + "valid_targets_mean": 6414.6, + "valid_targets_min": 4791 + }, + { + "epoch": 5.173187271778821, + "grad_norm": 0.47434963442982875, + "learning_rate": 7.759206969768216e-06, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0798363983631134, + "step": 4960, + "valid_targets_mean": 5675.9, + "valid_targets_min": 3983 + }, + { + "epoch": 5.178403755868545, + "grad_norm": 0.39121988903166655, + "learning_rate": 7.718121819864983e-06, + "loss": 0.1412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06391432881355286, + "step": 4965, + "valid_targets_mean": 6975.5, + "valid_targets_min": 4383 + }, + { + "epoch": 5.183620239958268, + "grad_norm": 0.4187000333349903, + "learning_rate": 7.677119709824635e-06, + "loss": 0.1362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07538995146751404, + "step": 4970, + "valid_targets_mean": 6417.1, + "valid_targets_min": 3182 + }, + { + "epoch": 5.188836724047992, + "grad_norm": 0.4861441970802587, + "learning_rate": 7.636200916869387e-06, + "loss": 0.1627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08574381470680237, + "step": 4975, + "valid_targets_mean": 6218.6, + "valid_targets_min": 5077 + }, + { + "epoch": 5.194053208137715, + "grad_norm": 0.49267386412094155, + "learning_rate": 7.595365717658143e-06, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07690165936946869, + "step": 4980, + "valid_targets_mean": 4774.9, + "valid_targets_min": 3663 + }, + { + "epoch": 5.199269692227439, + "grad_norm": 0.4827407467234675, + "learning_rate": 7.554614388284609e-06, + "loss": 0.1503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07823975384235382, + "step": 4985, + "valid_targets_mean": 6316.8, + "valid_targets_min": 4105 + }, + { + "epoch": 5.204486176317162, + "grad_norm": 0.42699160424738275, + "learning_rate": 7.513947204275453e-06, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06995242089033127, + "step": 4990, + "valid_targets_mean": 6528.9, + "valid_targets_min": 4537 + }, + { + "epoch": 5.209702660406886, + "grad_norm": 0.4305698440051622, + "learning_rate": 7.473364440588404e-06, + "loss": 0.1411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07087582349777222, + "step": 4995, + "valid_targets_mean": 6416.8, + "valid_targets_min": 2494 + }, + { + "epoch": 5.214919144496609, + "grad_norm": 0.43980244576156524, + "learning_rate": 7.432866371610403e-06, + "loss": 0.1531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07949298620223999, + "step": 5000, + "valid_targets_mean": 6070.9, + "valid_targets_min": 4921 + }, + { + "epoch": 5.220135628586333, + "grad_norm": 0.4412001904025245, + "learning_rate": 7.392453271155786e-06, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08294803649187088, + "step": 5005, + "valid_targets_mean": 6701.4, + "valid_targets_min": 5285 + }, + { + "epoch": 5.225352112676056, + "grad_norm": 0.4371842686311474, + "learning_rate": 7.352125412464368e-06, + "loss": 0.1455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07258106768131256, + "step": 5010, + "valid_targets_mean": 6273.9, + "valid_targets_min": 4464 + }, + { + "epoch": 5.23056859676578, + "grad_norm": 0.6902819011753237, + "learning_rate": 7.311883068199659e-06, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07050133496522903, + "step": 5015, + "valid_targets_mean": 5554.8, + "valid_targets_min": 4439 + }, + { + "epoch": 5.2357850808555035, + "grad_norm": 0.42415523389434207, + "learning_rate": 7.271726510446968e-06, + "loss": 0.1428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06306670606136322, + "step": 5020, + "valid_targets_mean": 6043.0, + "valid_targets_min": 4686 + }, + { + "epoch": 5.241001564945227, + "grad_norm": 0.4374637189775283, + "learning_rate": 7.231656010711609e-06, + "loss": 0.133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06594899296760559, + "step": 5025, + "valid_targets_mean": 5912.5, + "valid_targets_min": 3381 + }, + { + "epoch": 5.24621804903495, + "grad_norm": 0.4190882190597131, + "learning_rate": 7.191671839917025e-06, + "loss": 0.1333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06954061985015869, + "step": 5030, + "valid_targets_mean": 6495.5, + "valid_targets_min": 5309 + }, + { + "epoch": 5.251434533124674, + "grad_norm": 0.4392023607160537, + "learning_rate": 7.15177426840298e-06, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07784658670425415, + "step": 5035, + "valid_targets_mean": 6483.9, + "valid_targets_min": 5161 + }, + { + "epoch": 5.256651017214398, + "grad_norm": 0.49318069483285104, + "learning_rate": 7.111963565923723e-06, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07526691257953644, + "step": 5040, + "valid_targets_mean": 5958.9, + "valid_targets_min": 4666 + }, + { + "epoch": 5.261867501304121, + "grad_norm": 0.6128833078957455, + "learning_rate": 7.07224000164618e-06, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0994606614112854, + "step": 5045, + "valid_targets_mean": 1691.5, + "valid_targets_min": 1440 + }, + { + "epoch": 5.267083985393844, + "grad_norm": 0.44655319756171524, + "learning_rate": 7.032603844148098e-06, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06736750900745392, + "step": 5050, + "valid_targets_mean": 5299.1, + "valid_targets_min": 4547 + }, + { + "epoch": 5.272300469483568, + "grad_norm": 0.4578673452088674, + "learning_rate": 6.993055361416281e-06, + "loss": 0.1444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07548375427722931, + "step": 5055, + "valid_targets_mean": 6003.5, + "valid_targets_min": 5031 + }, + { + "epoch": 5.277516953573292, + "grad_norm": 0.4580233374352607, + "learning_rate": 6.953594820844725e-06, + "loss": 0.1531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07152295857667923, + "step": 5060, + "valid_targets_mean": 8169.6, + "valid_targets_min": 5487 + }, + { + "epoch": 5.2827334376630155, + "grad_norm": 0.4469891023849917, + "learning_rate": 6.914222489232834e-06, + "loss": 0.1575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0720517486333847, + "step": 5065, + "valid_targets_mean": 6474.9, + "valid_targets_min": 4253 + }, + { + "epoch": 5.287949921752738, + "grad_norm": 0.47486500697220446, + "learning_rate": 6.874938632783639e-06, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08745107054710388, + "step": 5070, + "valid_targets_mean": 7021.0, + "valid_targets_min": 4990 + }, + { + "epoch": 5.293166405842462, + "grad_norm": 0.47774234672994254, + "learning_rate": 6.835743517101947e-06, + "loss": 0.1531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09182476997375488, + "step": 5075, + "valid_targets_mean": 6356.0, + "valid_targets_min": 4385 + }, + { + "epoch": 5.298382889932186, + "grad_norm": 0.4306928377605481, + "learning_rate": 6.796637407192608e-06, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08341844379901886, + "step": 5080, + "valid_targets_mean": 6815.1, + "valid_targets_min": 5164 + }, + { + "epoch": 5.30359937402191, + "grad_norm": 0.4211095017386481, + "learning_rate": 6.7576205674586405e-06, + "loss": 0.1476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05625376105308533, + "step": 5085, + "valid_targets_mean": 5588.5, + "valid_targets_min": 4098 + }, + { + "epoch": 5.3088158581116325, + "grad_norm": 0.428788129206361, + "learning_rate": 6.718693261699542e-06, + "loss": 0.1426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07674535363912582, + "step": 5090, + "valid_targets_mean": 6167.6, + "valid_targets_min": 4801 + }, + { + "epoch": 5.314032342201356, + "grad_norm": 0.4559952377626588, + "learning_rate": 6.679855753109419e-06, + "loss": 0.175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18360626697540283, + "step": 5095, + "valid_targets_mean": 6224.9, + "valid_targets_min": 4772 + }, + { + "epoch": 5.31924882629108, + "grad_norm": 0.7114170034392329, + "learning_rate": 6.64110830427527e-06, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0870460569858551, + "step": 5100, + "valid_targets_mean": 6549.2, + "valid_targets_min": 5025 + }, + { + "epoch": 5.324465310380804, + "grad_norm": 0.887238582241089, + "learning_rate": 6.602451177175162e-06, + "loss": 0.1425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08601780235767365, + "step": 5105, + "valid_targets_mean": 6680.8, + "valid_targets_min": 4818 + }, + { + "epoch": 5.329681794470527, + "grad_norm": 0.4702882140247903, + "learning_rate": 6.563884633176505e-06, + "loss": 0.1535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07530678808689117, + "step": 5110, + "valid_targets_mean": 5835.1, + "valid_targets_min": 4111 + }, + { + "epoch": 5.33489827856025, + "grad_norm": 0.4338907228746573, + "learning_rate": 6.5254089330342366e-06, + "loss": 0.1559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0734003484249115, + "step": 5115, + "valid_targets_mean": 7029.9, + "valid_targets_min": 2275 + }, + { + "epoch": 5.340114762649974, + "grad_norm": 0.42055178751012134, + "learning_rate": 6.487024336889107e-06, + "loss": 0.136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06461232900619507, + "step": 5120, + "valid_targets_mean": 5621.2, + "valid_targets_min": 4826 + }, + { + "epoch": 5.345331246739698, + "grad_norm": 0.45104523949110753, + "learning_rate": 6.448731104265871e-06, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06972408294677734, + "step": 5125, + "valid_targets_mean": 6858.8, + "valid_targets_min": 4710 + }, + { + "epoch": 5.350547730829421, + "grad_norm": 0.40764790764878683, + "learning_rate": 6.410529494071596e-06, + "loss": 0.1368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10459072887897491, + "step": 5130, + "valid_targets_mean": 7012.5, + "valid_targets_min": 5421 + }, + { + "epoch": 5.3557642149191445, + "grad_norm": 0.43164427115924714, + "learning_rate": 6.372419764593825e-06, + "loss": 0.1434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0683847963809967, + "step": 5135, + "valid_targets_mean": 6967.6, + "valid_targets_min": 4949 + }, + { + "epoch": 5.360980699008868, + "grad_norm": 0.43819375671948196, + "learning_rate": 6.334402173498926e-06, + "loss": 0.1415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07439292967319489, + "step": 5140, + "valid_targets_mean": 6348.5, + "valid_targets_min": 4831 + }, + { + "epoch": 5.366197183098592, + "grad_norm": 0.4462274200266351, + "learning_rate": 6.296476977830272e-06, + "loss": 0.1505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09430162608623505, + "step": 5145, + "valid_targets_mean": 6597.2, + "valid_targets_min": 4018 + }, + { + "epoch": 5.371413667188315, + "grad_norm": 0.3535845546411115, + "learning_rate": 6.2586444340065625e-06, + "loss": 0.1409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052067432552576065, + "step": 5150, + "valid_targets_mean": 6420.9, + "valid_targets_min": 5034 + }, + { + "epoch": 5.376630151278039, + "grad_norm": 0.4287372156160583, + "learning_rate": 6.22090479782004e-06, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08518033474683762, + "step": 5155, + "valid_targets_mean": 6962.4, + "valid_targets_min": 5442 + }, + { + "epoch": 5.381846635367762, + "grad_norm": 0.5005056850864263, + "learning_rate": 6.18325832443478e-06, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07817043364048004, + "step": 5160, + "valid_targets_mean": 5610.9, + "valid_targets_min": 4609 + }, + { + "epoch": 5.387063119457486, + "grad_norm": 0.5347516925106713, + "learning_rate": 6.145705268384996e-06, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07606935501098633, + "step": 5165, + "valid_targets_mean": 5038.4, + "valid_targets_min": 2301 + }, + { + "epoch": 5.392279603547209, + "grad_norm": 0.5880389786630846, + "learning_rate": 6.108245883573258e-06, + "loss": 0.1976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10076425969600677, + "step": 5170, + "valid_targets_mean": 4602.4, + "valid_targets_min": 702 + }, + { + "epoch": 5.397496087636933, + "grad_norm": 1.8736200752775178, + "learning_rate": 6.070880423268839e-06, + "loss": 0.213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1282118558883667, + "step": 5175, + "valid_targets_mean": 4814.2, + "valid_targets_min": 1005 + }, + { + "epoch": 5.402712571726656, + "grad_norm": 0.5706588756380075, + "learning_rate": 6.033609140105949e-06, + "loss": 0.2086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08292192220687866, + "step": 5180, + "valid_targets_mean": 4273.4, + "valid_targets_min": 2054 + }, + { + "epoch": 5.40792905581638, + "grad_norm": 0.5981424024910736, + "learning_rate": 5.996432286082061e-06, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08246517926454544, + "step": 5185, + "valid_targets_mean": 3367.4, + "valid_targets_min": 1561 + }, + { + "epoch": 5.413145539906103, + "grad_norm": 0.583910953320807, + "learning_rate": 5.9593501125561885e-06, + "loss": 0.2135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13011981546878815, + "step": 5190, + "valid_targets_mean": 5055.8, + "valid_targets_min": 1970 + }, + { + "epoch": 5.418362023995827, + "grad_norm": 0.5033332617763896, + "learning_rate": 5.922362870247214e-06, + "loss": 0.2188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1124546080827713, + "step": 5195, + "valid_targets_mean": 6716.9, + "valid_targets_min": 1651 + }, + { + "epoch": 5.4235785080855505, + "grad_norm": 0.6944546561230228, + "learning_rate": 5.885470809232143e-06, + "loss": 0.2103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11158231645822525, + "step": 5200, + "valid_targets_mean": 6468.5, + "valid_targets_min": 2231 + }, + { + "epoch": 5.428794992175274, + "grad_norm": 0.5682063644909414, + "learning_rate": 5.8486741789444804e-06, + "loss": 0.2053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08298724889755249, + "step": 5205, + "valid_targets_mean": 3683.6, + "valid_targets_min": 467 + }, + { + "epoch": 5.434011476264997, + "grad_norm": 0.5538929488415794, + "learning_rate": 5.8119732281724715e-06, + "loss": 0.2134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12869033217430115, + "step": 5210, + "valid_targets_mean": 6501.8, + "valid_targets_min": 1748 + }, + { + "epoch": 5.439227960354721, + "grad_norm": 0.5727868995548158, + "learning_rate": 5.775368205057488e-06, + "loss": 0.2127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10288283228874207, + "step": 5215, + "valid_targets_mean": 4381.8, + "valid_targets_min": 1697 + }, + { + "epoch": 5.444444444444445, + "grad_norm": 0.6680234165511143, + "learning_rate": 5.738859357092297e-06, + "loss": 0.2072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08974325656890869, + "step": 5220, + "valid_targets_mean": 2841.9, + "valid_targets_min": 1223 + }, + { + "epoch": 5.449660928534168, + "grad_norm": 0.6192232474783773, + "learning_rate": 5.7024469311194095e-06, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11188208311796188, + "step": 5225, + "valid_targets_mean": 5914.2, + "valid_targets_min": 1552 + }, + { + "epoch": 5.454877412623891, + "grad_norm": 0.6840312135314707, + "learning_rate": 5.66613117332943e-06, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09818445146083832, + "step": 5230, + "valid_targets_mean": 3926.2, + "valid_targets_min": 1546 + }, + { + "epoch": 5.460093896713615, + "grad_norm": 0.5972496961130105, + "learning_rate": 5.629912329259355e-06, + "loss": 0.2073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10787554085254669, + "step": 5235, + "valid_targets_mean": 4878.8, + "valid_targets_min": 2362 + }, + { + "epoch": 5.465310380803339, + "grad_norm": 0.5793385257245801, + "learning_rate": 5.593790643790935e-06, + "loss": 0.2048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09302453696727753, + "step": 5240, + "valid_targets_mean": 3567.4, + "valid_targets_min": 1538 + }, + { + "epoch": 5.470526864893062, + "grad_norm": 0.677131423968468, + "learning_rate": 5.557766361149013e-06, + "loss": 0.203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10073326528072357, + "step": 5245, + "valid_targets_mean": 3454.9, + "valid_targets_min": 1967 + }, + { + "epoch": 5.475743348982785, + "grad_norm": 0.6920698229080083, + "learning_rate": 5.521839724899887e-06, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0909716933965683, + "step": 5250, + "valid_targets_mean": 2960.2, + "valid_targets_min": 935 + }, + { + "epoch": 5.480959833072509, + "grad_norm": 0.7004255553618203, + "learning_rate": 5.48601097794963e-06, + "loss": 0.2123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.120552659034729, + "step": 5255, + "valid_targets_mean": 3945.8, + "valid_targets_min": 2704 + }, + { + "epoch": 5.486176317162233, + "grad_norm": 0.7112365259035549, + "learning_rate": 5.450280362542495e-06, + "loss": 0.2053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08641450107097626, + "step": 5260, + "valid_targets_mean": 2315.9, + "valid_targets_min": 1173 + }, + { + "epoch": 5.491392801251957, + "grad_norm": 0.7165973670456973, + "learning_rate": 5.414648120259225e-06, + "loss": 0.2057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10411398112773895, + "step": 5265, + "valid_targets_mean": 4287.9, + "valid_targets_min": 1670 + }, + { + "epoch": 5.4966092853416795, + "grad_norm": 0.714713212771657, + "learning_rate": 5.379114492015467e-06, + "loss": 0.2018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09117931127548218, + "step": 5270, + "valid_targets_mean": 3211.1, + "valid_targets_min": 1329 + }, + { + "epoch": 5.501825769431403, + "grad_norm": 0.7696384683017399, + "learning_rate": 5.343679718060104e-06, + "loss": 0.1967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10997160524129868, + "step": 5275, + "valid_targets_mean": 3514.5, + "valid_targets_min": 2106 + }, + { + "epoch": 5.507042253521127, + "grad_norm": 0.5942220030942619, + "learning_rate": 5.308344037973672e-06, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09099546819925308, + "step": 5280, + "valid_targets_mean": 4602.6, + "valid_targets_min": 2222 + }, + { + "epoch": 5.51225873761085, + "grad_norm": 0.6449107869464846, + "learning_rate": 5.2731076906666786e-06, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09503896534442902, + "step": 5285, + "valid_targets_mean": 3499.4, + "valid_targets_min": 1565 + }, + { + "epoch": 5.517475221700574, + "grad_norm": 0.6294707399742935, + "learning_rate": 5.237970914378068e-06, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09718786180019379, + "step": 5290, + "valid_targets_mean": 3692.0, + "valid_targets_min": 1951 + }, + { + "epoch": 5.522691705790297, + "grad_norm": 0.6320834703879888, + "learning_rate": 5.202933946673532e-06, + "loss": 0.2083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1048717275261879, + "step": 5295, + "valid_targets_mean": 4503.1, + "valid_targets_min": 2450 + }, + { + "epoch": 5.527908189880021, + "grad_norm": 0.7498769452458843, + "learning_rate": 5.1679970244439695e-06, + "loss": 0.1908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08454498648643494, + "step": 5300, + "valid_targets_mean": 2662.9, + "valid_targets_min": 997 + }, + { + "epoch": 5.533124673969745, + "grad_norm": 0.6310808287636535, + "learning_rate": 5.13316038390383e-06, + "loss": 0.194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09482082724571228, + "step": 5305, + "valid_targets_mean": 4197.1, + "valid_targets_min": 3181 + }, + { + "epoch": 5.538341158059468, + "grad_norm": 0.6913173468149643, + "learning_rate": 5.098424260589565e-06, + "loss": 0.1996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09612376242876053, + "step": 5310, + "valid_targets_mean": 3967.5, + "valid_targets_min": 863 + }, + { + "epoch": 5.5435576421491914, + "grad_norm": 0.6459341138195221, + "learning_rate": 5.063788889357995e-06, + "loss": 0.1941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09091197699308395, + "step": 5315, + "valid_targets_mean": 3194.8, + "valid_targets_min": 1410 + }, + { + "epoch": 5.548774126238915, + "grad_norm": 0.621151226597146, + "learning_rate": 5.029254504384733e-06, + "loss": 0.186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08653881400823593, + "step": 5320, + "valid_targets_mean": 4585.5, + "valid_targets_min": 2110 + }, + { + "epoch": 5.553990610328638, + "grad_norm": 0.7044776724708862, + "learning_rate": 4.9948213391626325e-06, + "loss": 0.1999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07878439128398895, + "step": 5325, + "valid_targets_mean": 3022.8, + "valid_targets_min": 1318 + }, + { + "epoch": 5.559207094418362, + "grad_norm": 0.6359675231368914, + "learning_rate": 4.960489626500153e-06, + "loss": 0.1979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09040946513414383, + "step": 5330, + "valid_targets_mean": 3901.4, + "valid_targets_min": 1876 + }, + { + "epoch": 5.5644235785080856, + "grad_norm": 0.6704210138302098, + "learning_rate": 4.926259598519851e-06, + "loss": 0.1909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11020966619253159, + "step": 5335, + "valid_targets_mean": 3791.0, + "valid_targets_min": 2605 + }, + { + "epoch": 5.569640062597809, + "grad_norm": 0.6562796916087641, + "learning_rate": 4.892131486656733e-06, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0890621542930603, + "step": 5340, + "valid_targets_mean": 3304.4, + "valid_targets_min": 2018 + }, + { + "epoch": 5.574856546687533, + "grad_norm": 0.6562140501070438, + "learning_rate": 4.858105521656768e-06, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09382957965135574, + "step": 5345, + "valid_targets_mean": 3980.9, + "valid_targets_min": 1714 + }, + { + "epoch": 5.580073030777256, + "grad_norm": 0.70327061104374, + "learning_rate": 4.824181933575272e-06, + "loss": 0.1987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10688737779855728, + "step": 5350, + "valid_targets_mean": 5640.4, + "valid_targets_min": 2033 + }, + { + "epoch": 5.58528951486698, + "grad_norm": 0.6935323659027046, + "learning_rate": 4.790360951775392e-06, + "loss": 0.1912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11016125977039337, + "step": 5355, + "valid_targets_mean": 3889.9, + "valid_targets_min": 1924 + }, + { + "epoch": 5.590505998956703, + "grad_norm": 0.6698030574711339, + "learning_rate": 4.756642804926517e-06, + "loss": 0.2048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10112692415714264, + "step": 5360, + "valid_targets_mean": 3436.1, + "valid_targets_min": 2098 + }, + { + "epoch": 5.595722483046426, + "grad_norm": 0.7494285010427829, + "learning_rate": 4.7230277210027685e-06, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15430939197540283, + "step": 5365, + "valid_targets_mean": 4295.0, + "valid_targets_min": 1363 + }, + { + "epoch": 5.60093896713615, + "grad_norm": 0.8354673673208484, + "learning_rate": 4.689515927281427e-06, + "loss": 0.1876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10171963274478912, + "step": 5370, + "valid_targets_mean": 3449.4, + "valid_targets_min": 1087 + }, + { + "epoch": 5.606155451225874, + "grad_norm": 0.6973196366609759, + "learning_rate": 4.6561076503414235e-06, + "loss": 0.2003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1170945018529892, + "step": 5375, + "valid_targets_mean": 4451.1, + "valid_targets_min": 2483 + }, + { + "epoch": 5.6113719353155975, + "grad_norm": 0.6771329232451911, + "learning_rate": 4.622803116061789e-06, + "loss": 0.1996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10003726184368134, + "step": 5380, + "valid_targets_mean": 3770.6, + "valid_targets_min": 2316 + }, + { + "epoch": 5.616588419405321, + "grad_norm": 0.6749056892798497, + "learning_rate": 4.589602549620127e-06, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09864921867847443, + "step": 5385, + "valid_targets_mean": 3776.6, + "valid_targets_min": 2022 + }, + { + "epoch": 5.621804903495044, + "grad_norm": 0.6381986247742556, + "learning_rate": 4.556506175491097e-06, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06832664459943771, + "step": 5390, + "valid_targets_mean": 2919.2, + "valid_targets_min": 1294 + }, + { + "epoch": 5.627021387584768, + "grad_norm": 0.7730125946316095, + "learning_rate": 4.523514217444918e-06, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11474058032035828, + "step": 5395, + "valid_targets_mean": 3461.2, + "valid_targets_min": 1700 + }, + { + "epoch": 5.632237871674492, + "grad_norm": 0.7338700074521602, + "learning_rate": 4.490626898545805e-06, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09719344973564148, + "step": 5400, + "valid_targets_mean": 2870.9, + "valid_targets_min": 1586 + }, + { + "epoch": 5.6374543557642145, + "grad_norm": 0.7349890117191052, + "learning_rate": 4.4578444411505005e-06, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10376821458339691, + "step": 5405, + "valid_targets_mean": 3219.4, + "valid_targets_min": 1358 + }, + { + "epoch": 5.642670839853938, + "grad_norm": 0.676215226763973, + "learning_rate": 4.425167066906777e-06, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10248099267482758, + "step": 5410, + "valid_targets_mean": 3983.4, + "valid_targets_min": 2290 + }, + { + "epoch": 5.647887323943662, + "grad_norm": 0.6726146570731603, + "learning_rate": 4.392594996751891e-06, + "loss": 0.1988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12992066144943237, + "step": 5415, + "valid_targets_mean": 5156.4, + "valid_targets_min": 2189 + }, + { + "epoch": 5.653103808033386, + "grad_norm": 0.6837591697476089, + "learning_rate": 4.360128450911154e-06, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11537463217973709, + "step": 5420, + "valid_targets_mean": 4121.2, + "valid_targets_min": 2121 + }, + { + "epoch": 5.6583202921231095, + "grad_norm": 0.7230193860224345, + "learning_rate": 4.3277676488963775e-06, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13380834460258484, + "step": 5425, + "valid_targets_mean": 4899.4, + "valid_targets_min": 2011 + }, + { + "epoch": 5.663536776212832, + "grad_norm": 0.777999651822206, + "learning_rate": 4.295512809504447e-06, + "loss": 0.1887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08464520424604416, + "step": 5430, + "valid_targets_mean": 3752.9, + "valid_targets_min": 2674 + }, + { + "epoch": 5.668753260302556, + "grad_norm": 0.662197303864658, + "learning_rate": 4.263364150815803e-06, + "loss": 0.1859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11234456300735474, + "step": 5435, + "valid_targets_mean": 3976.4, + "valid_targets_min": 2266 + }, + { + "epoch": 5.67396974439228, + "grad_norm": 0.6197205513217392, + "learning_rate": 4.231321890192981e-06, + "loss": 0.1895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08293233811855316, + "step": 5440, + "valid_targets_mean": 4403.5, + "valid_targets_min": 904 + }, + { + "epoch": 5.679186228482003, + "grad_norm": 0.7408629220283659, + "learning_rate": 4.19938624427914e-06, + "loss": 0.192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13740897178649902, + "step": 5445, + "valid_targets_mean": 4388.2, + "valid_targets_min": 1219 + }, + { + "epoch": 5.6844027125717265, + "grad_norm": 0.6599545149018337, + "learning_rate": 4.167557428996611e-06, + "loss": 0.1963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0836276113986969, + "step": 5450, + "valid_targets_mean": 3888.8, + "valid_targets_min": 1658 + }, + { + "epoch": 5.68961919666145, + "grad_norm": 0.7123275791545848, + "learning_rate": 4.135835659545406e-06, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09113888442516327, + "step": 5455, + "valid_targets_mean": 3065.0, + "valid_targets_min": 1423 + }, + { + "epoch": 5.694835680751174, + "grad_norm": 0.6662491369255369, + "learning_rate": 4.104221150401806e-06, + "loss": 0.1947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08419522643089294, + "step": 5460, + "valid_targets_mean": 3947.2, + "valid_targets_min": 2320 + }, + { + "epoch": 5.700052164840898, + "grad_norm": 0.6868579041923284, + "learning_rate": 4.072714115316863e-06, + "loss": 0.1989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0934920608997345, + "step": 5465, + "valid_targets_mean": 3820.2, + "valid_targets_min": 1811 + }, + { + "epoch": 5.705268648930621, + "grad_norm": 0.6396666234800631, + "learning_rate": 4.041314767314983e-06, + "loss": 0.2002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08919529616832733, + "step": 5470, + "valid_targets_mean": 3953.8, + "valid_targets_min": 952 + }, + { + "epoch": 5.710485133020344, + "grad_norm": 0.7120972275909109, + "learning_rate": 4.010023318692502e-06, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08941976726055145, + "step": 5475, + "valid_targets_mean": 2668.1, + "valid_targets_min": 1364 + }, + { + "epoch": 5.715701617110068, + "grad_norm": 0.686071052915526, + "learning_rate": 3.978839981016203e-06, + "loss": 0.1923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10472702234983444, + "step": 5480, + "valid_targets_mean": 3530.9, + "valid_targets_min": 1397 + }, + { + "epoch": 5.720918101199791, + "grad_norm": 0.6697782901090433, + "learning_rate": 3.947764965121934e-06, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09821496903896332, + "step": 5485, + "valid_targets_mean": 4327.6, + "valid_targets_min": 1415 + }, + { + "epoch": 5.726134585289515, + "grad_norm": 0.6612668679771639, + "learning_rate": 3.916798481113144e-06, + "loss": 0.1833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09757659584283829, + "step": 5490, + "valid_targets_mean": 3737.0, + "valid_targets_min": 621 + }, + { + "epoch": 5.731351069379238, + "grad_norm": 0.8061145527588353, + "learning_rate": 3.885940738359492e-06, + "loss": 0.1809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07764957845211029, + "step": 5495, + "valid_targets_mean": 3610.9, + "valid_targets_min": 1290 + }, + { + "epoch": 5.736567553468962, + "grad_norm": 0.7423557514215533, + "learning_rate": 3.855191945495405e-06, + "loss": 0.1799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07822816073894501, + "step": 5500, + "valid_targets_mean": 2158.9, + "valid_targets_min": 642 + }, + { + "epoch": 5.741784037558686, + "grad_norm": 0.7116805103743424, + "learning_rate": 3.824552310418703e-06, + "loss": 0.1784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09957093745470047, + "step": 5505, + "valid_targets_mean": 3336.2, + "valid_targets_min": 2109 + }, + { + "epoch": 5.747000521648409, + "grad_norm": 0.9436105939103613, + "learning_rate": 3.794022040289147e-06, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12506654858589172, + "step": 5510, + "valid_targets_mean": 4459.6, + "valid_targets_min": 2590 + }, + { + "epoch": 5.7522170057381325, + "grad_norm": 0.6761433351365501, + "learning_rate": 3.763601341527088e-06, + "loss": 0.2045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08072531968355179, + "step": 5515, + "valid_targets_mean": 3302.4, + "valid_targets_min": 1510 + }, + { + "epoch": 5.757433489827856, + "grad_norm": 0.7509038121845254, + "learning_rate": 3.733290419812019e-06, + "loss": 0.1853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11254091560840607, + "step": 5520, + "valid_targets_mean": 3270.5, + "valid_targets_min": 987 + }, + { + "epoch": 5.762649973917579, + "grad_norm": 0.6762166925969065, + "learning_rate": 3.7030894800812365e-06, + "loss": 0.1998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09430418163537979, + "step": 5525, + "valid_targets_mean": 3920.6, + "valid_targets_min": 1609 + }, + { + "epoch": 5.767866458007303, + "grad_norm": 0.6389502180208496, + "learning_rate": 3.672998726528414e-06, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08114771544933319, + "step": 5530, + "valid_targets_mean": 3647.4, + "valid_targets_min": 1700 + }, + { + "epoch": 5.773082942097027, + "grad_norm": 0.5801672441657589, + "learning_rate": 3.6430183626022574e-06, + "loss": 0.1897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08160527050495148, + "step": 5535, + "valid_targets_mean": 5263.6, + "valid_targets_min": 1767 + }, + { + "epoch": 5.77829942618675, + "grad_norm": 0.5978884618834638, + "learning_rate": 3.613148591005071e-06, + "loss": 0.1903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0727209746837616, + "step": 5540, + "valid_targets_mean": 3840.6, + "valid_targets_min": 1511 + }, + { + "epoch": 5.783515910276473, + "grad_norm": 0.7935236203030991, + "learning_rate": 3.5833896136914705e-06, + "loss": 0.1867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07489004731178284, + "step": 5545, + "valid_targets_mean": 2503.0, + "valid_targets_min": 974 + }, + { + "epoch": 5.788732394366197, + "grad_norm": 0.6580061553709526, + "learning_rate": 3.553741631866938e-06, + "loss": 0.1901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07514528930187225, + "step": 5550, + "valid_targets_mean": 3380.5, + "valid_targets_min": 1476 + }, + { + "epoch": 5.793948878455921, + "grad_norm": 0.7068459296871152, + "learning_rate": 3.524204845986523e-06, + "loss": 0.1862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08917607367038727, + "step": 5555, + "valid_targets_mean": 3563.6, + "valid_targets_min": 1249 + }, + { + "epoch": 5.7991653625456445, + "grad_norm": 0.6729624544949937, + "learning_rate": 3.494779455753443e-06, + "loss": 0.1934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08329454064369202, + "step": 5560, + "valid_targets_mean": 3249.8, + "valid_targets_min": 1669 + }, + { + "epoch": 5.804381846635367, + "grad_norm": 0.7727834549009607, + "learning_rate": 3.4654656601177482e-06, + "loss": 0.2057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09545086324214935, + "step": 5565, + "valid_targets_mean": 2861.2, + "valid_targets_min": 1318 + }, + { + "epoch": 5.809598330725091, + "grad_norm": 0.7793091095869094, + "learning_rate": 3.4362636572749984e-06, + "loss": 0.1935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0952250063419342, + "step": 5570, + "valid_targets_mean": 2206.4, + "valid_targets_min": 1002 + }, + { + "epoch": 5.814814814814815, + "grad_norm": 0.7231082645029193, + "learning_rate": 3.4071736446648805e-06, + "loss": 0.2102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07934655249118805, + "step": 5575, + "valid_targets_mean": 2393.9, + "valid_targets_min": 845 + }, + { + "epoch": 5.820031298904539, + "grad_norm": 0.6197207666137181, + "learning_rate": 3.3781958189699183e-06, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10013750195503235, + "step": 5580, + "valid_targets_mean": 4607.1, + "valid_targets_min": 2611 + }, + { + "epoch": 5.8252477829942615, + "grad_norm": 0.7214964921447167, + "learning_rate": 3.3493303761141016e-06, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09076010435819626, + "step": 5585, + "valid_targets_mean": 3470.9, + "valid_targets_min": 1241 + }, + { + "epoch": 5.830464267083985, + "grad_norm": 0.7593264061302211, + "learning_rate": 3.320577511261589e-06, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11610162258148193, + "step": 5590, + "valid_targets_mean": 3474.2, + "valid_targets_min": 992 + }, + { + "epoch": 5.835680751173709, + "grad_norm": 0.6710199742506909, + "learning_rate": 3.291937418815376e-06, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09124165028333664, + "step": 5595, + "valid_targets_mean": 3450.8, + "valid_targets_min": 1133 + }, + { + "epoch": 5.840897235263433, + "grad_norm": 0.7148794775132431, + "learning_rate": 3.2634102924159982e-06, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09886611998081207, + "step": 5600, + "valid_targets_mean": 3689.0, + "valid_targets_min": 2256 + }, + { + "epoch": 5.846113719353156, + "grad_norm": 0.6877019139900377, + "learning_rate": 3.2349963249401894e-06, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08637898415327072, + "step": 5605, + "valid_targets_mean": 4224.8, + "valid_targets_min": 1919 + }, + { + "epoch": 5.851330203442879, + "grad_norm": 0.7524780220022054, + "learning_rate": 3.2066957084996163e-06, + "loss": 0.1968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09474893659353256, + "step": 5610, + "valid_targets_mean": 3339.4, + "valid_targets_min": 2763 + }, + { + "epoch": 5.856546687532603, + "grad_norm": 0.7673243014347821, + "learning_rate": 3.178508634439539e-06, + "loss": 0.1998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09032605588436127, + "step": 5615, + "valid_targets_mean": 2628.4, + "valid_targets_min": 617 + }, + { + "epoch": 5.861763171622327, + "grad_norm": 0.7595910170563226, + "learning_rate": 3.150435293337557e-06, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09978964179754257, + "step": 5620, + "valid_targets_mean": 4144.9, + "valid_targets_min": 3147 + }, + { + "epoch": 5.86697965571205, + "grad_norm": 0.6528892022429073, + "learning_rate": 3.1224758750022934e-06, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09148497879505157, + "step": 5625, + "valid_targets_mean": 3559.9, + "valid_targets_min": 1844 + }, + { + "epoch": 5.8721961398017735, + "grad_norm": 0.6745813176114499, + "learning_rate": 3.0946305684721145e-06, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09244602918624878, + "step": 5630, + "valid_targets_mean": 4051.6, + "valid_targets_min": 2330 + }, + { + "epoch": 5.877412623891497, + "grad_norm": 0.7808613750846146, + "learning_rate": 3.066899562013872e-06, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11704682558774948, + "step": 5635, + "valid_targets_mean": 3475.0, + "valid_targets_min": 1066 + }, + { + "epoch": 5.882629107981221, + "grad_norm": 0.6246287400250787, + "learning_rate": 3.0392830431216037e-06, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09796503186225891, + "step": 5640, + "valid_targets_mean": 4077.0, + "valid_targets_min": 1713 + }, + { + "epoch": 5.887845592070944, + "grad_norm": 0.7117147519216562, + "learning_rate": 3.01178119851528e-06, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11242762953042984, + "step": 5645, + "valid_targets_mean": 3838.5, + "valid_targets_min": 981 + }, + { + "epoch": 5.893062076160668, + "grad_norm": 0.6523910203555894, + "learning_rate": 2.9843942141395365e-06, + "loss": 0.197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11463186144828796, + "step": 5650, + "valid_targets_mean": 4425.5, + "valid_targets_min": 3325 + }, + { + "epoch": 5.898278560250391, + "grad_norm": 0.9436670376107035, + "learning_rate": 2.9571222751624317e-06, + "loss": 0.1993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11222006380558014, + "step": 5655, + "valid_targets_mean": 3307.1, + "valid_targets_min": 1525 + }, + { + "epoch": 5.903495044340115, + "grad_norm": 0.6289825692395081, + "learning_rate": 2.9299655659741622e-06, + "loss": 0.1967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09087878465652466, + "step": 5660, + "valid_targets_mean": 3937.9, + "valid_targets_min": 2273 + }, + { + "epoch": 5.908711528429838, + "grad_norm": 0.8119757858958936, + "learning_rate": 2.9029242701858606e-06, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08784656226634979, + "step": 5665, + "valid_targets_mean": 3462.4, + "valid_targets_min": 2106 + }, + { + "epoch": 5.913928012519562, + "grad_norm": 0.6670178193934468, + "learning_rate": 2.8759985706283068e-06, + "loss": 0.1858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08142383396625519, + "step": 5670, + "valid_targets_mean": 3902.1, + "valid_targets_min": 1687 + }, + { + "epoch": 5.919144496609285, + "grad_norm": 0.6870263539475759, + "learning_rate": 2.8491886493507313e-06, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1019984781742096, + "step": 5675, + "valid_targets_mean": 3604.1, + "valid_targets_min": 2246 + }, + { + "epoch": 5.924360980699009, + "grad_norm": 0.6837627185584352, + "learning_rate": 2.8224946876195593e-06, + "loss": 0.1899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08432202786207199, + "step": 5680, + "valid_targets_mean": 3352.8, + "valid_targets_min": 906 + }, + { + "epoch": 5.929577464788732, + "grad_norm": 0.6950158629807149, + "learning_rate": 2.795916865917201e-06, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11337006837129593, + "step": 5685, + "valid_targets_mean": 4221.8, + "valid_targets_min": 1874 + }, + { + "epoch": 5.934793948878456, + "grad_norm": 0.6279835561380916, + "learning_rate": 2.7694553639408163e-06, + "loss": 0.1821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10258927941322327, + "step": 5690, + "valid_targets_mean": 4002.5, + "valid_targets_min": 2145 + }, + { + "epoch": 5.9400104329681795, + "grad_norm": 0.6470929529084897, + "learning_rate": 2.7431103606011113e-06, + "loss": 0.1869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09089256078004837, + "step": 5695, + "valid_targets_mean": 3473.2, + "valid_targets_min": 1697 + }, + { + "epoch": 5.945226917057903, + "grad_norm": 0.7166494540435914, + "learning_rate": 2.71688203402112e-06, + "loss": 0.1883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09629033505916595, + "step": 5700, + "valid_targets_mean": 2876.6, + "valid_targets_min": 897 + }, + { + "epoch": 5.950443401147626, + "grad_norm": 0.7798529478722462, + "learning_rate": 2.690770561535019e-06, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09422819316387177, + "step": 5705, + "valid_targets_mean": 3168.4, + "valid_targets_min": 1695 + }, + { + "epoch": 5.95565988523735, + "grad_norm": 0.9590974740379314, + "learning_rate": 2.664776119686896e-06, + "loss": 0.1933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09302680939435959, + "step": 5710, + "valid_targets_mean": 3473.4, + "valid_targets_min": 1712 + }, + { + "epoch": 5.960876369327074, + "grad_norm": 0.6939700799851168, + "learning_rate": 2.6388988842295947e-06, + "loss": 0.1877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07662299275398254, + "step": 5715, + "valid_targets_mean": 2963.9, + "valid_targets_min": 993 + }, + { + "epoch": 5.966092853416797, + "grad_norm": 0.7254880710187078, + "learning_rate": 2.6131390301234927e-06, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09925804287195206, + "step": 5720, + "valid_targets_mean": 4306.5, + "valid_targets_min": 2027 + }, + { + "epoch": 5.97130933750652, + "grad_norm": 0.6379597367289886, + "learning_rate": 2.587496731535326e-06, + "loss": 0.1935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09103129804134369, + "step": 5725, + "valid_targets_mean": 4049.0, + "valid_targets_min": 2250 + }, + { + "epoch": 5.976525821596244, + "grad_norm": 0.7461288471266831, + "learning_rate": 2.561972161837041e-06, + "loss": 0.1991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09840848296880722, + "step": 5730, + "valid_targets_mean": 4258.1, + "valid_targets_min": 2892 + }, + { + "epoch": 5.981742305685968, + "grad_norm": 0.6883258749050553, + "learning_rate": 2.536565493604575e-06, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11052220314741135, + "step": 5735, + "valid_targets_mean": 4290.0, + "valid_targets_min": 2965 + }, + { + "epoch": 5.9869587897756915, + "grad_norm": 0.7067141690071591, + "learning_rate": 2.511276898616737e-06, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08707703649997711, + "step": 5740, + "valid_targets_mean": 3887.6, + "valid_targets_min": 1817 + }, + { + "epoch": 5.992175273865414, + "grad_norm": 0.7422937669391017, + "learning_rate": 2.4861065478539925e-06, + "loss": 0.1943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11102442443370819, + "step": 5745, + "valid_targets_mean": 4666.2, + "valid_targets_min": 1727 + }, + { + "epoch": 5.997391757955138, + "grad_norm": 0.6469558841650395, + "learning_rate": 2.4610546114973666e-06, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08849377185106277, + "step": 5750, + "valid_targets_mean": 3806.2, + "valid_targets_min": 1959 + }, + { + "epoch": 6.002086593635889, + "grad_norm": 0.597628436799616, + "learning_rate": 2.4361212589272488e-06, + "loss": 0.1749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0758490115404129, + "step": 5755, + "valid_targets_mean": 5904.0, + "valid_targets_min": 4967 + }, + { + "epoch": 6.007303077725613, + "grad_norm": 0.5441911080013484, + "learning_rate": 2.41130665872227e-06, + "loss": 0.1511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08887383341789246, + "step": 5760, + "valid_targets_mean": 8641.4, + "valid_targets_min": 5164 + }, + { + "epoch": 6.012519561815337, + "grad_norm": 0.47582903730461884, + "learning_rate": 2.3866109786581484e-06, + "loss": 0.1464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07055404037237167, + "step": 5765, + "valid_targets_mean": 6838.2, + "valid_targets_min": 5200 + }, + { + "epoch": 6.01773604590506, + "grad_norm": 0.5228519103141637, + "learning_rate": 2.3620343857065776e-06, + "loss": 0.1443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05986611545085907, + "step": 5770, + "valid_targets_mean": 5411.6, + "valid_targets_min": 3853 + }, + { + "epoch": 6.022952529994783, + "grad_norm": 0.45010274282681373, + "learning_rate": 2.3375770460340654e-06, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08203235268592834, + "step": 5775, + "valid_targets_mean": 5598.0, + "valid_targets_min": 4669 + }, + { + "epoch": 6.028169014084507, + "grad_norm": 0.48593455115911577, + "learning_rate": 2.313239125000841e-06, + "loss": 0.1569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07801078259944916, + "step": 5780, + "valid_targets_mean": 5953.0, + "valid_targets_min": 4216 + }, + { + "epoch": 6.033385498174231, + "grad_norm": 0.7311303202570739, + "learning_rate": 2.2890207871597192e-06, + "loss": 0.1513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07807885110378265, + "step": 5785, + "valid_targets_mean": 5914.0, + "valid_targets_min": 4589 + }, + { + "epoch": 6.0386019822639545, + "grad_norm": 0.46166151258811233, + "learning_rate": 2.2649221962549905e-06, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07456433027982712, + "step": 5790, + "valid_targets_mean": 6515.4, + "valid_targets_min": 4588 + }, + { + "epoch": 6.043818466353677, + "grad_norm": 0.4506755183644086, + "learning_rate": 2.2409435152213123e-06, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0703895092010498, + "step": 5795, + "valid_targets_mean": 7015.2, + "valid_targets_min": 4530 + }, + { + "epoch": 6.049034950443401, + "grad_norm": 0.45242065782026375, + "learning_rate": 2.217084906182629e-06, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08111800253391266, + "step": 5800, + "valid_targets_mean": 6202.6, + "valid_targets_min": 4837 + }, + { + "epoch": 6.054251434533125, + "grad_norm": 0.5317192480222525, + "learning_rate": 2.1933465304510394e-06, + "loss": 0.1504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047664374113082886, + "step": 5805, + "valid_targets_mean": 1720.9, + "valid_targets_min": 229 + }, + { + "epoch": 6.059467918622849, + "grad_norm": 0.44605770160842584, + "learning_rate": 2.1697285485257245e-06, + "loss": 0.1447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07360129803419113, + "step": 5810, + "valid_targets_mean": 5963.8, + "valid_targets_min": 4216 + }, + { + "epoch": 6.0646844027125715, + "grad_norm": 0.441853350472938, + "learning_rate": 2.1462311200918816e-06, + "loss": 0.1515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07072561979293823, + "step": 5815, + "valid_targets_mean": 5973.9, + "valid_targets_min": 4835 + }, + { + "epoch": 6.069900886802295, + "grad_norm": 0.4188306291698298, + "learning_rate": 2.122854404019601e-06, + "loss": 0.1394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0653071254491806, + "step": 5820, + "valid_targets_mean": 6669.8, + "valid_targets_min": 4995 + }, + { + "epoch": 6.075117370892019, + "grad_norm": 0.499794469460597, + "learning_rate": 2.0995985583628366e-06, + "loss": 0.1347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08894228935241699, + "step": 5825, + "valid_targets_mean": 5705.5, + "valid_targets_min": 4108 + }, + { + "epoch": 6.080333854981743, + "grad_norm": 0.5126012459941671, + "learning_rate": 2.076463740358299e-06, + "loss": 0.1437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09236709773540497, + "step": 5830, + "valid_targets_mean": 6594.0, + "valid_targets_min": 4527 + }, + { + "epoch": 6.085550339071466, + "grad_norm": 0.5333508343208846, + "learning_rate": 2.053450106424426e-06, + "loss": 0.151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08448384702205658, + "step": 5835, + "valid_targets_mean": 3100.9, + "valid_targets_min": 2104 + }, + { + "epoch": 6.090766823161189, + "grad_norm": 0.43429186203171866, + "learning_rate": 2.030557812160301e-06, + "loss": 0.1339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06234288960695267, + "step": 5840, + "valid_targets_mean": 5817.8, + "valid_targets_min": 4757 + }, + { + "epoch": 6.095983307250913, + "grad_norm": 0.4887043994253701, + "learning_rate": 2.0077870123446107e-06, + "loss": 0.1331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08603031188249588, + "step": 5845, + "valid_targets_mean": 6333.0, + "valid_targets_min": 5056 + }, + { + "epoch": 6.101199791340637, + "grad_norm": 0.444335254952959, + "learning_rate": 1.985137860934594e-06, + "loss": 0.1362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06609348207712173, + "step": 5850, + "valid_targets_mean": 5956.0, + "valid_targets_min": 2834 + }, + { + "epoch": 6.10641627543036, + "grad_norm": 0.4562419359747214, + "learning_rate": 1.962610511065013e-06, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10073341429233551, + "step": 5855, + "valid_targets_mean": 6984.6, + "valid_targets_min": 5744 + }, + { + "epoch": 6.1116327595200834, + "grad_norm": 0.4158979212817962, + "learning_rate": 1.940205115047098e-06, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06610018014907837, + "step": 5860, + "valid_targets_mean": 7290.2, + "valid_targets_min": 5640 + }, + { + "epoch": 6.116849243609807, + "grad_norm": 0.4345197955777367, + "learning_rate": 1.917921824367539e-06, + "loss": 0.141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06808597594499588, + "step": 5865, + "valid_targets_mean": 7036.0, + "valid_targets_min": 5210 + }, + { + "epoch": 6.122065727699531, + "grad_norm": 0.4786142212572958, + "learning_rate": 1.8957607896874419e-06, + "loss": 0.1516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09497830271720886, + "step": 5870, + "valid_targets_mean": 7138.5, + "valid_targets_min": 5292 + }, + { + "epoch": 6.127282211789254, + "grad_norm": 0.4754029749645017, + "learning_rate": 1.8737221608413314e-06, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08020825684070587, + "step": 5875, + "valid_targets_mean": 6134.2, + "valid_targets_min": 3531 + }, + { + "epoch": 6.132498695878978, + "grad_norm": 0.4595985195497783, + "learning_rate": 1.8518060868361099e-06, + "loss": 0.1785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0816727876663208, + "step": 5880, + "valid_targets_mean": 5968.2, + "valid_targets_min": 4640 + }, + { + "epoch": 6.137715179968701, + "grad_norm": 0.4349894067292051, + "learning_rate": 1.8300127158500714e-06, + "loss": 0.1595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07355540245771408, + "step": 5885, + "valid_targets_mean": 6249.6, + "valid_targets_min": 4328 + }, + { + "epoch": 6.142931664058425, + "grad_norm": 0.4649670581381955, + "learning_rate": 1.8083421952319047e-06, + "loss": 0.1511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08149757981300354, + "step": 5890, + "valid_targets_mean": 6002.4, + "valid_targets_min": 3123 + }, + { + "epoch": 6.148148148148148, + "grad_norm": 0.47609258117287856, + "learning_rate": 1.786794671499672e-06, + "loss": 0.1481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07623299211263657, + "step": 5895, + "valid_targets_mean": 6372.2, + "valid_targets_min": 4256 + }, + { + "epoch": 6.153364632237872, + "grad_norm": 0.4486882067374972, + "learning_rate": 1.7653702903398384e-06, + "loss": 0.1532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08099064975976944, + "step": 5900, + "valid_targets_mean": 5927.8, + "valid_targets_min": 4753 + }, + { + "epoch": 6.158581116327595, + "grad_norm": 0.43621557139298894, + "learning_rate": 1.7440691966062816e-06, + "loss": 0.1491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07459859549999237, + "step": 5905, + "valid_targets_mean": 6106.1, + "valid_targets_min": 5370 + }, + { + "epoch": 6.163797600417318, + "grad_norm": 0.49217396559189464, + "learning_rate": 1.722891534319313e-06, + "loss": 0.1546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08426357805728912, + "step": 5910, + "valid_targets_mean": 6240.6, + "valid_targets_min": 4608 + }, + { + "epoch": 6.169014084507042, + "grad_norm": 0.4676130506498011, + "learning_rate": 1.7018374466646981e-06, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06954306364059448, + "step": 5915, + "valid_targets_mean": 5733.2, + "valid_targets_min": 3801 + }, + { + "epoch": 6.174230568596766, + "grad_norm": 0.44996760565595567, + "learning_rate": 1.6809070759927015e-06, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07962311804294586, + "step": 5920, + "valid_targets_mean": 6430.1, + "valid_targets_min": 4754 + }, + { + "epoch": 6.1794470526864895, + "grad_norm": 0.363655030101441, + "learning_rate": 1.6601005638171065e-06, + "loss": 0.1306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05828050896525383, + "step": 5925, + "valid_targets_mean": 7375.2, + "valid_targets_min": 6417 + }, + { + "epoch": 6.184663536776213, + "grad_norm": 0.4914962238322863, + "learning_rate": 1.639418050814281e-06, + "loss": 0.1425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08257021009922028, + "step": 5930, + "valid_targets_mean": 6281.0, + "valid_targets_min": 3468 + }, + { + "epoch": 6.189880020865936, + "grad_norm": 0.4732215959721491, + "learning_rate": 1.6188596768221976e-06, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07489991188049316, + "step": 5935, + "valid_targets_mean": 6532.4, + "valid_targets_min": 5221 + }, + { + "epoch": 6.19509650495566, + "grad_norm": 0.4412663862848313, + "learning_rate": 1.5984255808395198e-06, + "loss": 0.1512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06268274039030075, + "step": 5940, + "valid_targets_mean": 5552.6, + "valid_targets_min": 4438 + }, + { + "epoch": 6.200312989045384, + "grad_norm": 0.4421749860239771, + "learning_rate": 1.5781159010246306e-06, + "loss": 0.1475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07922811806201935, + "step": 5945, + "valid_targets_mean": 5878.1, + "valid_targets_min": 3915 + }, + { + "epoch": 6.2055294731351065, + "grad_norm": 0.37366490916733003, + "learning_rate": 1.557930774694727e-06, + "loss": 0.1424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059279974550008774, + "step": 5950, + "valid_targets_mean": 7082.6, + "valid_targets_min": 4135 + }, + { + "epoch": 6.21074595722483, + "grad_norm": 0.4584975326954793, + "learning_rate": 1.5378703383248694e-06, + "loss": 0.143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07578883320093155, + "step": 5955, + "valid_targets_mean": 5936.4, + "valid_targets_min": 3947 + }, + { + "epoch": 6.215962441314554, + "grad_norm": 0.4463332155023387, + "learning_rate": 1.5179347275470812e-06, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0972755178809166, + "step": 5960, + "valid_targets_mean": 6836.2, + "valid_targets_min": 4865 + }, + { + "epoch": 6.221178925404278, + "grad_norm": 0.4268664185766591, + "learning_rate": 1.4981240771494032e-06, + "loss": 0.15, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07237567007541656, + "step": 5965, + "valid_targets_mean": 6539.4, + "valid_targets_min": 4643 + }, + { + "epoch": 6.2263954094940015, + "grad_norm": 0.41791385505125306, + "learning_rate": 1.4784385210750052e-06, + "loss": 0.1413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07602772116661072, + "step": 5970, + "valid_targets_mean": 6613.6, + "valid_targets_min": 4959 + }, + { + "epoch": 6.231611893583724, + "grad_norm": 0.4730636514971436, + "learning_rate": 1.458878192421278e-06, + "loss": 0.1606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0815482810139656, + "step": 5975, + "valid_targets_mean": 6044.1, + "valid_targets_min": 4652 + }, + { + "epoch": 6.236828377673448, + "grad_norm": 0.4502607924810824, + "learning_rate": 1.4394432234389167e-06, + "loss": 0.1326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06586840748786926, + "step": 5980, + "valid_targets_mean": 5893.9, + "valid_targets_min": 4938 + }, + { + "epoch": 6.242044861763172, + "grad_norm": 0.4124629451583863, + "learning_rate": 1.4201337455310537e-06, + "loss": 0.1279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05999205261468887, + "step": 5985, + "valid_targets_mean": 5849.6, + "valid_targets_min": 4546 + }, + { + "epoch": 6.247261345852895, + "grad_norm": 0.4302055888904862, + "learning_rate": 1.4009498892523388e-06, + "loss": 0.1305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.067840076982975, + "step": 5990, + "valid_targets_mean": 5751.5, + "valid_targets_min": 3200 + }, + { + "epoch": 6.2524778299426185, + "grad_norm": 0.4422925974924787, + "learning_rate": 1.3818917843080848e-06, + "loss": 0.1365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07369904220104218, + "step": 5995, + "valid_targets_mean": 6149.6, + "valid_targets_min": 5010 + }, + { + "epoch": 6.257694314032342, + "grad_norm": 0.47404703219712485, + "learning_rate": 1.3629595595533673e-06, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08218543231487274, + "step": 6000, + "valid_targets_mean": 6551.2, + "valid_targets_min": 4904 + }, + { + "epoch": 6.262910798122066, + "grad_norm": 0.505915566634165, + "learning_rate": 1.3441533429921804e-06, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10287123173475266, + "step": 6005, + "valid_targets_mean": 5728.9, + "valid_targets_min": 4812 + }, + { + "epoch": 6.26812728221179, + "grad_norm": 0.4658189069176967, + "learning_rate": 1.3254732617765375e-06, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06064922735095024, + "step": 6010, + "valid_targets_mean": 5047.9, + "valid_targets_min": 3629 + }, + { + "epoch": 6.273343766301513, + "grad_norm": 0.45361843335466556, + "learning_rate": 1.3069194422056454e-06, + "loss": 0.1429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07493551075458527, + "step": 6015, + "valid_targets_mean": 6260.0, + "valid_targets_min": 4324 + }, + { + "epoch": 6.278560250391236, + "grad_norm": 0.41985560827761914, + "learning_rate": 1.2884920097250197e-06, + "loss": 0.1445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06478630006313324, + "step": 6020, + "valid_targets_mean": 7853.1, + "valid_targets_min": 4920 + }, + { + "epoch": 6.28377673448096, + "grad_norm": 0.45657367841564317, + "learning_rate": 1.2701910889256651e-06, + "loss": 0.1572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0703677386045456, + "step": 6025, + "valid_targets_mean": 6231.0, + "valid_targets_min": 4882 + }, + { + "epoch": 6.288993218570683, + "grad_norm": 0.4774675411031373, + "learning_rate": 1.2520168035432102e-06, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07129646837711334, + "step": 6030, + "valid_targets_mean": 7472.9, + "valid_targets_min": 3943 + }, + { + "epoch": 6.294209702660407, + "grad_norm": 0.4371996079692814, + "learning_rate": 1.2339692764570853e-06, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07915046811103821, + "step": 6035, + "valid_targets_mean": 6840.2, + "valid_targets_min": 5034 + }, + { + "epoch": 6.29942618675013, + "grad_norm": 0.443904155833791, + "learning_rate": 1.2160486296896834e-06, + "loss": 0.1496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07795794308185577, + "step": 6040, + "valid_targets_mean": 6458.5, + "valid_targets_min": 4461 + }, + { + "epoch": 6.304642670839854, + "grad_norm": 0.41490694962167574, + "learning_rate": 1.198254984405538e-06, + "loss": 0.1405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06944327056407928, + "step": 6045, + "valid_targets_mean": 6645.4, + "valid_targets_min": 5188 + }, + { + "epoch": 6.309859154929577, + "grad_norm": 0.419908285559951, + "learning_rate": 1.1805884609105012e-06, + "loss": 0.1409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08055192232131958, + "step": 6050, + "valid_targets_mean": 6002.9, + "valid_targets_min": 5009 + }, + { + "epoch": 6.315075639019301, + "grad_norm": 0.4363411948940314, + "learning_rate": 1.1630491786509457e-06, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08345134556293488, + "step": 6055, + "valid_targets_mean": 6025.9, + "valid_targets_min": 4551 + }, + { + "epoch": 6.3202921231090246, + "grad_norm": 0.40391010865392174, + "learning_rate": 1.1456372562129281e-06, + "loss": 0.1398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0750681534409523, + "step": 6060, + "valid_targets_mean": 7210.0, + "valid_targets_min": 4492 + }, + { + "epoch": 6.325508607198748, + "grad_norm": 0.4331230667589545, + "learning_rate": 1.1283528113214114e-06, + "loss": 0.142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07180407643318176, + "step": 6065, + "valid_targets_mean": 7051.4, + "valid_targets_min": 4718 + }, + { + "epoch": 6.330725091288471, + "grad_norm": 0.4688546231171162, + "learning_rate": 1.1111959608394662e-06, + "loss": 0.1496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07835313677787781, + "step": 6070, + "valid_targets_mean": 6686.0, + "valid_targets_min": 4827 + }, + { + "epoch": 6.335941575378195, + "grad_norm": 0.4222557657355432, + "learning_rate": 1.094166820767464e-06, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0590282641351223, + "step": 6075, + "valid_targets_mean": 5311.9, + "valid_targets_min": 3828 + }, + { + "epoch": 6.341158059467919, + "grad_norm": 0.4592548831164615, + "learning_rate": 1.0772655062423176e-06, + "loss": 0.1295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06287582963705063, + "step": 6080, + "valid_targets_mean": 7078.9, + "valid_targets_min": 4594 + }, + { + "epoch": 6.346374543557642, + "grad_norm": 0.4366862711701498, + "learning_rate": 1.0604921315366767e-06, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0600103922188282, + "step": 6085, + "valid_targets_mean": 6393.8, + "valid_targets_min": 4509 + }, + { + "epoch": 6.351591027647365, + "grad_norm": 0.4385547681672465, + "learning_rate": 1.0438468100581823e-06, + "loss": 0.1346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08215263485908508, + "step": 6090, + "valid_targets_mean": 7848.9, + "valid_targets_min": 5649 + }, + { + "epoch": 6.356807511737089, + "grad_norm": 0.39338443863583267, + "learning_rate": 1.027329654348672e-06, + "loss": 0.1349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06234607845544815, + "step": 6095, + "valid_targets_mean": 6623.4, + "valid_targets_min": 4343 + }, + { + "epoch": 6.362023995826813, + "grad_norm": 0.6157520375022761, + "learning_rate": 1.01094077608344e-06, + "loss": 0.141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06733613461256027, + "step": 6100, + "valid_targets_mean": 6255.9, + "valid_targets_min": 4660 + }, + { + "epoch": 6.3672404799165365, + "grad_norm": 0.3929343182875014, + "learning_rate": 9.946802860704706e-07, + "loss": 0.1447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07071949541568756, + "step": 6105, + "valid_targets_mean": 8243.0, + "valid_targets_min": 6281 + }, + { + "epoch": 6.372456964006259, + "grad_norm": 0.39077256059337795, + "learning_rate": 9.785482942497037e-07, + "loss": 0.1348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07058300822973251, + "step": 6110, + "valid_targets_mean": 7083.5, + "valid_targets_min": 4657 + }, + { + "epoch": 6.377673448095983, + "grad_norm": 0.616043775718402, + "learning_rate": 9.625449096922667e-07, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08512598276138306, + "step": 6115, + "valid_targets_mean": 6399.8, + "valid_targets_min": 5098 + }, + { + "epoch": 6.382889932185707, + "grad_norm": 0.6517067294303951, + "learning_rate": 9.466702405997674e-07, + "loss": 0.1483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0778372511267662, + "step": 6120, + "valid_targets_mean": 5257.4, + "valid_targets_min": 1879 + }, + { + "epoch": 6.388106416275431, + "grad_norm": 0.5061808438707962, + "learning_rate": 9.309243943035295e-07, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08713942766189575, + "step": 6125, + "valid_targets_mean": 5260.5, + "valid_targets_min": 3619 + }, + { + "epoch": 6.3933229003651535, + "grad_norm": 0.7113799433457525, + "learning_rate": 9.153074772638937e-07, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11782443523406982, + "step": 6130, + "valid_targets_mean": 4215.5, + "valid_targets_min": 1142 + }, + { + "epoch": 6.398539384454877, + "grad_norm": 0.629588468325526, + "learning_rate": 8.99819595069491e-07, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09832511842250824, + "step": 6135, + "valid_targets_mean": 4934.0, + "valid_targets_min": 1799 + }, + { + "epoch": 6.403755868544601, + "grad_norm": 0.5960635962292052, + "learning_rate": 8.844608524365172e-07, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10664057731628418, + "step": 6140, + "valid_targets_mean": 5817.9, + "valid_targets_min": 3015 + }, + { + "epoch": 6.408972352634325, + "grad_norm": 0.5365596850498139, + "learning_rate": 8.692313532080443e-07, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10521996021270752, + "step": 6145, + "valid_targets_mean": 6296.1, + "valid_targets_min": 1681 + }, + { + "epoch": 6.414188836724048, + "grad_norm": 0.6592033302273491, + "learning_rate": 8.54131200353292e-07, + "loss": 0.2109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0958816334605217, + "step": 6150, + "valid_targets_mean": 3804.1, + "valid_targets_min": 1374 + }, + { + "epoch": 6.419405320813771, + "grad_norm": 0.6844904376171532, + "learning_rate": 8.391604959669619e-07, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10067593306303024, + "step": 6155, + "valid_targets_mean": 3971.6, + "valid_targets_min": 1951 + }, + { + "epoch": 6.424621804903495, + "grad_norm": 0.5519922783144792, + "learning_rate": 8.243193412685246e-07, + "loss": 0.205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09023644030094147, + "step": 6160, + "valid_targets_mean": 5107.8, + "valid_targets_min": 2042 + }, + { + "epoch": 6.429838288993219, + "grad_norm": 0.579454792905566, + "learning_rate": 8.096078366015514e-07, + "loss": 0.1979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08574582636356354, + "step": 6165, + "valid_targets_mean": 5041.9, + "valid_targets_min": 1831 + }, + { + "epoch": 6.435054773082942, + "grad_norm": 0.6273873397791723, + "learning_rate": 7.950260814330169e-07, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12119754403829575, + "step": 6170, + "valid_targets_mean": 4604.1, + "valid_targets_min": 2077 + }, + { + "epoch": 6.4402712571726655, + "grad_norm": 0.5490469659660709, + "learning_rate": 7.805741743526441e-07, + "loss": 0.2041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09245245903730392, + "step": 6175, + "valid_targets_mean": 4612.0, + "valid_targets_min": 2022 + }, + { + "epoch": 6.445487741262389, + "grad_norm": 0.6277693459407978, + "learning_rate": 7.662522130722294e-07, + "loss": 0.2055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12131471931934357, + "step": 6180, + "valid_targets_mean": 5350.1, + "valid_targets_min": 1499 + }, + { + "epoch": 6.450704225352113, + "grad_norm": 0.5320121773754007, + "learning_rate": 7.520602944249855e-07, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09855857491493225, + "step": 6185, + "valid_targets_mean": 4540.5, + "valid_targets_min": 1013 + }, + { + "epoch": 6.455920709441836, + "grad_norm": 0.628701080827187, + "learning_rate": 7.379985143648815e-07, + "loss": 0.2054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10051758587360382, + "step": 6190, + "valid_targets_mean": 4368.1, + "valid_targets_min": 1939 + }, + { + "epoch": 6.46113719353156, + "grad_norm": 0.6482761545771282, + "learning_rate": 7.240669679660017e-07, + "loss": 0.196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11205872893333435, + "step": 6195, + "valid_targets_mean": 3930.9, + "valid_targets_min": 1820 + }, + { + "epoch": 6.466353677621283, + "grad_norm": 0.6825368440361551, + "learning_rate": 7.102657494218879e-07, + "loss": 0.1998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1051061749458313, + "step": 6200, + "valid_targets_mean": 3865.8, + "valid_targets_min": 1589 + }, + { + "epoch": 6.471570161711007, + "grad_norm": 0.6184419968599022, + "learning_rate": 6.965949520449311e-07, + "loss": 0.1939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09660804271697998, + "step": 6205, + "valid_targets_mean": 4365.5, + "valid_targets_min": 2668 + }, + { + "epoch": 6.47678664580073, + "grad_norm": 0.6619033691172838, + "learning_rate": 6.830546682657035e-07, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09968802332878113, + "step": 6210, + "valid_targets_mean": 3969.9, + "valid_targets_min": 1318 + }, + { + "epoch": 6.482003129890454, + "grad_norm": 0.6779926642410552, + "learning_rate": 6.696449896323698e-07, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09951610863208771, + "step": 6215, + "valid_targets_mean": 3546.0, + "valid_targets_min": 2058 + }, + { + "epoch": 6.487219613980177, + "grad_norm": 0.6811512719960557, + "learning_rate": 6.563660068100408e-07, + "loss": 0.1999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08553477376699448, + "step": 6220, + "valid_targets_mean": 2989.2, + "valid_targets_min": 1128 + }, + { + "epoch": 6.492436098069901, + "grad_norm": 0.6596745245246597, + "learning_rate": 6.432178095801678e-07, + "loss": 0.1941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09903447329998016, + "step": 6225, + "valid_targets_mean": 3647.8, + "valid_targets_min": 2001 + }, + { + "epoch": 6.497652582159624, + "grad_norm": 0.6482576165469474, + "learning_rate": 6.302004868399514e-07, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09738115966320038, + "step": 6230, + "valid_targets_mean": 4171.5, + "valid_targets_min": 1267 + }, + { + "epoch": 6.502869066249348, + "grad_norm": 0.6625835126975278, + "learning_rate": 6.173141266017113e-07, + "loss": 0.1894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0937391147017479, + "step": 6235, + "valid_targets_mean": 4544.0, + "valid_targets_min": 3111 + }, + { + "epoch": 6.5080855503390715, + "grad_norm": 0.7324370486504399, + "learning_rate": 6.045588159923266e-07, + "loss": 0.1828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09694460779428482, + "step": 6240, + "valid_targets_mean": 2999.6, + "valid_targets_min": 1403 + }, + { + "epoch": 6.513302034428795, + "grad_norm": 0.6727313820048448, + "learning_rate": 5.919346412526117e-07, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11848916113376617, + "step": 6245, + "valid_targets_mean": 4133.2, + "valid_targets_min": 1588 + }, + { + "epoch": 6.518518518518518, + "grad_norm": 0.7927474005995049, + "learning_rate": 5.794416877367526e-07, + "loss": 0.1979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09734158962965012, + "step": 6250, + "valid_targets_mean": 3115.0, + "valid_targets_min": 1363 + }, + { + "epoch": 6.523735002608242, + "grad_norm": 0.5707061122942222, + "learning_rate": 5.670800399117316e-07, + "loss": 0.1939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07926693558692932, + "step": 6255, + "valid_targets_mean": 4001.2, + "valid_targets_min": 1631 + }, + { + "epoch": 6.528951486697966, + "grad_norm": 0.6399717041914177, + "learning_rate": 5.548497813567455e-07, + "loss": 0.1933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12379235029220581, + "step": 6260, + "valid_targets_mean": 5059.0, + "valid_targets_min": 3090 + }, + { + "epoch": 6.534167970787689, + "grad_norm": 0.7183321713596021, + "learning_rate": 5.427509947626486e-07, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11741982400417328, + "step": 6265, + "valid_targets_mean": 3915.0, + "valid_targets_min": 1802 + }, + { + "epoch": 6.539384454877412, + "grad_norm": 0.7546929362915107, + "learning_rate": 5.307837619313949e-07, + "loss": 0.1941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11509503424167633, + "step": 6270, + "valid_targets_mean": 3672.2, + "valid_targets_min": 1685 + }, + { + "epoch": 6.544600938967136, + "grad_norm": 0.6316468719956794, + "learning_rate": 5.189481637754679e-07, + "loss": 0.184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09108906984329224, + "step": 6275, + "valid_targets_mean": 4007.2, + "valid_targets_min": 2292 + }, + { + "epoch": 6.54981742305686, + "grad_norm": 0.66687112779914, + "learning_rate": 5.072442803173649e-07, + "loss": 0.188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10096663236618042, + "step": 6280, + "valid_targets_mean": 4008.5, + "valid_targets_min": 2060 + }, + { + "epoch": 6.5550339071465835, + "grad_norm": 0.7087884026043622, + "learning_rate": 4.956721906890228e-07, + "loss": 0.1883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09432883560657501, + "step": 6285, + "valid_targets_mean": 3193.8, + "valid_targets_min": 1458 + }, + { + "epoch": 6.560250391236306, + "grad_norm": 0.697748218608281, + "learning_rate": 4.842319731313016e-07, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09759856015443802, + "step": 6290, + "valid_targets_mean": 4109.6, + "valid_targets_min": 1512 + }, + { + "epoch": 6.56546687532603, + "grad_norm": 0.7362880864851357, + "learning_rate": 4.729237049934621e-07, + "loss": 0.1849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08084060996770859, + "step": 6295, + "valid_targets_mean": 2608.5, + "valid_targets_min": 1480 + }, + { + "epoch": 6.570683359415754, + "grad_norm": 0.7238810408864689, + "learning_rate": 4.6174746273261793e-07, + "loss": 0.1938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09399285912513733, + "step": 6300, + "valid_targets_mean": 3886.5, + "valid_targets_min": 1229 + }, + { + "epoch": 6.575899843505478, + "grad_norm": 0.7036850846133101, + "learning_rate": 4.507033219132395e-07, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09871475398540497, + "step": 6305, + "valid_targets_mean": 3347.1, + "valid_targets_min": 1276 + }, + { + "epoch": 6.5811163275952005, + "grad_norm": 0.6299069138421014, + "learning_rate": 4.3979135720664035e-07, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08941268175840378, + "step": 6310, + "valid_targets_mean": 3769.8, + "valid_targets_min": 1357 + }, + { + "epoch": 6.586332811684924, + "grad_norm": 0.6928078753717489, + "learning_rate": 4.2901164239046443e-07, + "loss": 0.1851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09616914391517639, + "step": 6315, + "valid_targets_mean": 3142.2, + "valid_targets_min": 924 + }, + { + "epoch": 6.591549295774648, + "grad_norm": 0.6610805651271812, + "learning_rate": 4.1836425034819106e-07, + "loss": 0.2041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1008647009730339, + "step": 6320, + "valid_targets_mean": 4241.0, + "valid_targets_min": 2089 + }, + { + "epoch": 6.596765779864372, + "grad_norm": 0.6671949023526996, + "learning_rate": 4.078492530686506e-07, + "loss": 0.1916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09079846739768982, + "step": 6325, + "valid_targets_mean": 3015.9, + "valid_targets_min": 1643 + }, + { + "epoch": 6.601982263954095, + "grad_norm": 0.6229292016801433, + "learning_rate": 3.9746672164551856e-07, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09595387428998947, + "step": 6330, + "valid_targets_mean": 4017.5, + "valid_targets_min": 1244 + }, + { + "epoch": 6.607198748043818, + "grad_norm": 0.6651029746270444, + "learning_rate": 3.872167262768578e-07, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09952102601528168, + "step": 6335, + "valid_targets_mean": 4399.2, + "valid_targets_min": 730 + }, + { + "epoch": 6.612415232133542, + "grad_norm": 0.6757801230902546, + "learning_rate": 3.7709933626461916e-07, + "loss": 0.1923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07291614264249802, + "step": 6340, + "valid_targets_mean": 2727.6, + "valid_targets_min": 977 + }, + { + "epoch": 6.617631716223266, + "grad_norm": 0.69492026054824, + "learning_rate": 3.671146200142017e-07, + "loss": 0.1911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10746291279792786, + "step": 6345, + "valid_targets_mean": 4673.0, + "valid_targets_min": 2091 + }, + { + "epoch": 6.622848200312989, + "grad_norm": 0.6774812431202281, + "learning_rate": 3.5726264503396e-07, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08594183623790741, + "step": 6350, + "valid_targets_mean": 3184.4, + "valid_targets_min": 1798 + }, + { + "epoch": 6.6280646844027125, + "grad_norm": 0.6756115160563645, + "learning_rate": 3.475434779347731e-07, + "loss": 0.2011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09412597119808197, + "step": 6355, + "valid_targets_mean": 3983.6, + "valid_targets_min": 1432 + }, + { + "epoch": 6.633281168492436, + "grad_norm": 0.6174532077675494, + "learning_rate": 3.3795718442957593e-07, + "loss": 0.1865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08559626340866089, + "step": 6360, + "valid_targets_mean": 3666.4, + "valid_targets_min": 1261 + }, + { + "epoch": 6.63849765258216, + "grad_norm": 0.7038464360792298, + "learning_rate": 3.2850382933292903e-07, + "loss": 0.1941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11896900832653046, + "step": 6365, + "valid_targets_mean": 4252.8, + "valid_targets_min": 2737 + }, + { + "epoch": 6.643714136671883, + "grad_norm": 0.6533877247148979, + "learning_rate": 3.1918347656056946e-07, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08064286410808563, + "step": 6370, + "valid_targets_mean": 3150.9, + "valid_targets_min": 1219 + }, + { + "epoch": 6.648930620761607, + "grad_norm": 0.788320538799848, + "learning_rate": 3.0999618912898267e-07, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08878035098314285, + "step": 6375, + "valid_targets_mean": 3408.0, + "valid_targets_min": 2058 + }, + { + "epoch": 6.65414710485133, + "grad_norm": 0.7359408241178381, + "learning_rate": 3.009420291549825e-07, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08388732373714447, + "step": 6380, + "valid_targets_mean": 2731.5, + "valid_targets_min": 916 + }, + { + "epoch": 6.659363588941054, + "grad_norm": 0.6629496925157677, + "learning_rate": 2.9202105785527623e-07, + "loss": 0.1989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08356083929538727, + "step": 6385, + "valid_targets_mean": 3799.5, + "valid_targets_min": 1302 + }, + { + "epoch": 6.664580073030777, + "grad_norm": 0.6940667760520753, + "learning_rate": 2.8323333554606923e-07, + "loss": 0.1846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11537809669971466, + "step": 6390, + "valid_targets_mean": 3893.1, + "valid_targets_min": 1500 + }, + { + "epoch": 6.669796557120501, + "grad_norm": 0.6674810581856995, + "learning_rate": 2.745789216426387e-07, + "loss": 0.1826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11355355381965637, + "step": 6395, + "valid_targets_mean": 4703.6, + "valid_targets_min": 2803 + }, + { + "epoch": 6.675013041210224, + "grad_norm": 0.6124856590850242, + "learning_rate": 2.660578746589515e-07, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08361378312110901, + "step": 6400, + "valid_targets_mean": 3884.5, + "valid_targets_min": 878 + }, + { + "epoch": 6.680229525299948, + "grad_norm": 0.6516586157992343, + "learning_rate": 2.5767025220724717e-07, + "loss": 0.1991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09938517212867737, + "step": 6405, + "valid_targets_mean": 3935.0, + "valid_targets_min": 2662 + }, + { + "epoch": 6.685446009389671, + "grad_norm": 0.8830610653548544, + "learning_rate": 2.4941611099766896e-07, + "loss": 0.1886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08714678883552551, + "step": 6410, + "valid_targets_mean": 3903.6, + "valid_targets_min": 1086 + }, + { + "epoch": 6.690662493479395, + "grad_norm": 0.6373458088158991, + "learning_rate": 2.4129550683786194e-07, + "loss": 0.1959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10533812642097473, + "step": 6415, + "valid_targets_mean": 5394.1, + "valid_targets_min": 1566 + }, + { + "epoch": 6.6958789775691185, + "grad_norm": 0.6634021979640021, + "learning_rate": 2.3330849463261363e-07, + "loss": 0.1908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11396835744380951, + "step": 6420, + "valid_targets_mean": 3978.9, + "valid_targets_min": 2504 + }, + { + "epoch": 6.701095461658842, + "grad_norm": 0.7370243247872715, + "learning_rate": 2.2545512838346716e-07, + "loss": 0.1963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11917287111282349, + "step": 6425, + "valid_targets_mean": 4021.4, + "valid_targets_min": 1548 + }, + { + "epoch": 6.706311945748565, + "grad_norm": 0.7020168539085931, + "learning_rate": 2.1773546118836418e-07, + "loss": 0.1905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08278389275074005, + "step": 6430, + "valid_targets_mean": 2818.6, + "valid_targets_min": 1352 + }, + { + "epoch": 6.711528429838289, + "grad_norm": 0.6096392395244696, + "learning_rate": 2.1014954524128493e-07, + "loss": 0.1873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09961653500795364, + "step": 6435, + "valid_targets_mean": 5038.8, + "valid_targets_min": 2339 + }, + { + "epoch": 6.716744913928013, + "grad_norm": 0.6113839377022464, + "learning_rate": 2.0269743183189528e-07, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09155851602554321, + "step": 6440, + "valid_targets_mean": 4025.4, + "valid_targets_min": 2015 + }, + { + "epoch": 6.721961398017736, + "grad_norm": 0.64667000460593, + "learning_rate": 1.9537917134520023e-07, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09636899828910828, + "step": 6445, + "valid_targets_mean": 4755.2, + "valid_targets_min": 1731 + }, + { + "epoch": 6.727177882107459, + "grad_norm": 0.7435022540058803, + "learning_rate": 1.881948132611977e-07, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07450690865516663, + "step": 6450, + "valid_targets_mean": 2545.1, + "valid_targets_min": 1460 + }, + { + "epoch": 6.732394366197183, + "grad_norm": 0.6966622291013195, + "learning_rate": 1.811444061545542e-07, + "loss": 0.1785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1022343635559082, + "step": 6455, + "valid_targets_mean": 4214.9, + "valid_targets_min": 1282 + }, + { + "epoch": 6.737610850286907, + "grad_norm": 0.7462983581900599, + "learning_rate": 1.7422799769426736e-07, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0818384513258934, + "step": 6460, + "valid_targets_mean": 3325.1, + "valid_targets_min": 1421 + }, + { + "epoch": 6.7428273343766305, + "grad_norm": 0.6710395765387892, + "learning_rate": 1.674456346433506e-07, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10047239810228348, + "step": 6465, + "valid_targets_mean": 4091.0, + "valid_targets_min": 1764 + }, + { + "epoch": 6.748043818466353, + "grad_norm": 0.7514266145567918, + "learning_rate": 1.607973628585091e-07, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1063307598233223, + "step": 6470, + "valid_targets_mean": 2931.1, + "valid_targets_min": 1175 + }, + { + "epoch": 6.753260302556077, + "grad_norm": 0.6163487283958144, + "learning_rate": 1.542832272898398e-07, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08855152875185013, + "step": 6475, + "valid_targets_mean": 4426.1, + "valid_targets_min": 2546 + }, + { + "epoch": 6.758476786645801, + "grad_norm": 0.817035145037278, + "learning_rate": 1.4790327198051624e-07, + "loss": 0.1843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11842495203018188, + "step": 6480, + "valid_targets_mean": 4051.2, + "valid_targets_min": 1244 + }, + { + "epoch": 6.763693270735525, + "grad_norm": 0.8749185172353121, + "learning_rate": 1.416575400664999e-07, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10528464615345001, + "step": 6485, + "valid_targets_mean": 4871.5, + "valid_targets_min": 3056 + }, + { + "epoch": 6.7689097548252475, + "grad_norm": 0.6987069828977293, + "learning_rate": 1.3554607377624263e-07, + "loss": 0.1845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09647294878959656, + "step": 6490, + "valid_targets_mean": 3943.5, + "valid_targets_min": 1948 + }, + { + "epoch": 6.774126238914971, + "grad_norm": 0.6101994762804832, + "learning_rate": 1.295689144304091e-07, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08448642492294312, + "step": 6495, + "valid_targets_mean": 4304.1, + "valid_targets_min": 1558 + }, + { + "epoch": 6.779342723004695, + "grad_norm": 0.6158366334694998, + "learning_rate": 1.237261024415881e-07, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0801931768655777, + "step": 6500, + "valid_targets_mean": 4116.9, + "valid_targets_min": 1145 + }, + { + "epoch": 6.784559207094419, + "grad_norm": 0.6857586141694774, + "learning_rate": 1.1801767731401958e-07, + "loss": 0.1862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10176026821136475, + "step": 6505, + "valid_targets_mean": 3832.9, + "valid_targets_min": 1428 + }, + { + "epoch": 6.789775691184142, + "grad_norm": 0.7057376208408004, + "learning_rate": 1.1244367764333464e-07, + "loss": 0.1836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10797597467899323, + "step": 6510, + "valid_targets_mean": 4549.4, + "valid_targets_min": 1708 + }, + { + "epoch": 6.794992175273865, + "grad_norm": 0.6832621171631214, + "learning_rate": 1.0700414111629365e-07, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07267992198467255, + "step": 6515, + "valid_targets_mean": 3170.9, + "valid_targets_min": 1301 + }, + { + "epoch": 6.800208659363589, + "grad_norm": 0.7077200621070784, + "learning_rate": 1.0169910451052422e-07, + "loss": 0.1962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09386198967695236, + "step": 6520, + "valid_targets_mean": 3377.1, + "valid_targets_min": 1971 + }, + { + "epoch": 6.805425143453313, + "grad_norm": 0.6397847483161193, + "learning_rate": 9.652860369428141e-08, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10848961025476456, + "step": 6525, + "valid_targets_mean": 4398.6, + "valid_targets_min": 1054 + }, + { + "epoch": 6.810641627543036, + "grad_norm": 0.7803177160065538, + "learning_rate": 9.149267362619895e-08, + "loss": 0.1923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11172320693731308, + "step": 6530, + "valid_targets_mean": 4018.1, + "valid_targets_min": 1612 + }, + { + "epoch": 6.8158581116327595, + "grad_norm": 0.650002808426002, + "learning_rate": 8.659134835504956e-08, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07572606205940247, + "step": 6535, + "valid_targets_mean": 3404.1, + "valid_targets_min": 721 + }, + { + "epoch": 6.821074595722483, + "grad_norm": 0.6893904339867692, + "learning_rate": 8.18246610195339e-08, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09537730365991592, + "step": 6540, + "valid_targets_mean": 4015.5, + "valid_targets_min": 1625 + }, + { + "epoch": 6.826291079812207, + "grad_norm": 0.6424645442825622, + "learning_rate": 7.719264384802527e-08, + "loss": 0.2042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09576665610074997, + "step": 6545, + "valid_targets_mean": 4242.8, + "valid_targets_min": 1381 + }, + { + "epoch": 6.83150756390193, + "grad_norm": 0.712156462106448, + "learning_rate": 7.269532815838532e-08, + "loss": 0.2007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09769143909215927, + "step": 6550, + "valid_targets_mean": 3441.9, + "valid_targets_min": 998 + }, + { + "epoch": 6.836724047991654, + "grad_norm": 0.6827128971902778, + "learning_rate": 6.833274435772196e-08, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08550089597702026, + "step": 6555, + "valid_targets_mean": 3983.0, + "valid_targets_min": 1186 + }, + { + "epoch": 6.841940532081377, + "grad_norm": 0.7148980975516651, + "learning_rate": 6.410492194220963e-08, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11428973823785782, + "step": 6560, + "valid_targets_mean": 3655.4, + "valid_targets_min": 1195 + }, + { + "epoch": 6.847157016171101, + "grad_norm": 0.7278319014979708, + "learning_rate": 6.001188949687153e-08, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09005587548017502, + "step": 6565, + "valid_targets_mean": 2551.0, + "valid_targets_min": 885 + }, + { + "epoch": 6.852373500260824, + "grad_norm": 0.7098604794810907, + "learning_rate": 5.6053674695395463e-08, + "loss": 0.1942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08953683078289032, + "step": 6570, + "valid_targets_mean": 3153.6, + "valid_targets_min": 814 + }, + { + "epoch": 6.857589984350548, + "grad_norm": 0.6707953650336487, + "learning_rate": 5.22303042999428e-08, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08640475571155548, + "step": 6575, + "valid_targets_mean": 3468.2, + "valid_targets_min": 2174 + }, + { + "epoch": 6.862806468440271, + "grad_norm": 0.7020287207089069, + "learning_rate": 4.8541804160968655e-08, + "loss": 0.1799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10647602379322052, + "step": 6580, + "valid_targets_mean": 5480.9, + "valid_targets_min": 3025 + }, + { + "epoch": 6.868022952529994, + "grad_norm": 0.7378779836306224, + "learning_rate": 4.498819921705089e-08, + "loss": 0.1923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09601832181215286, + "step": 6585, + "valid_targets_mean": 3307.9, + "valid_targets_min": 1434 + }, + { + "epoch": 6.873239436619718, + "grad_norm": 0.7140400104364611, + "learning_rate": 4.15695134947125e-08, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09141425788402557, + "step": 6590, + "valid_targets_mean": 3271.1, + "valid_targets_min": 1074 + }, + { + "epoch": 6.878455920709442, + "grad_norm": 0.6722357710881463, + "learning_rate": 3.8285770108272836e-08, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08441444486379623, + "step": 6595, + "valid_targets_mean": 3626.1, + "valid_targets_min": 1510 + }, + { + "epoch": 6.8836724047991655, + "grad_norm": 0.659968812083692, + "learning_rate": 3.5136991259672183e-08, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09881377965211868, + "step": 6600, + "valid_targets_mean": 4041.6, + "valid_targets_min": 1885 + }, + { + "epoch": 6.888888888888889, + "grad_norm": 0.668962747287096, + "learning_rate": 3.212319823834298e-08, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09592445194721222, + "step": 6605, + "valid_targets_mean": 5077.0, + "valid_targets_min": 3139 + }, + { + "epoch": 6.894105372978612, + "grad_norm": 0.7017510481073289, + "learning_rate": 2.9244411421049946e-08, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1116001158952713, + "step": 6610, + "valid_targets_mean": 4221.8, + "valid_targets_min": 2598 + }, + { + "epoch": 6.899321857068336, + "grad_norm": 0.9649150924907022, + "learning_rate": 2.6500650271759077e-08, + "loss": 0.1933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08478601276874542, + "step": 6615, + "valid_targets_mean": 3226.0, + "valid_targets_min": 1460 + }, + { + "epoch": 6.90453834115806, + "grad_norm": 0.6978675004253714, + "learning_rate": 2.3891933341506636e-08, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10948053747415543, + "step": 6620, + "valid_targets_mean": 4045.0, + "valid_targets_min": 1084 + }, + { + "epoch": 6.9097548252477825, + "grad_norm": 0.657117902164971, + "learning_rate": 2.1418278268270365e-08, + "loss": 0.1853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09133946895599365, + "step": 6625, + "valid_targets_mean": 3752.2, + "valid_targets_min": 1618 + }, + { + "epoch": 6.914971309337506, + "grad_norm": 0.7009816394599806, + "learning_rate": 1.907970177684737e-08, + "loss": 0.1855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09786844253540039, + "step": 6630, + "valid_targets_mean": 3573.1, + "valid_targets_min": 1193 + }, + { + "epoch": 6.92018779342723, + "grad_norm": 0.7140309543500324, + "learning_rate": 1.687621967875863e-08, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0966082364320755, + "step": 6635, + "valid_targets_mean": 4408.8, + "valid_targets_min": 2092 + }, + { + "epoch": 6.925404277516954, + "grad_norm": 0.6492312857310307, + "learning_rate": 1.4807846872113563e-08, + "loss": 0.1862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1101188063621521, + "step": 6640, + "valid_targets_mean": 4397.6, + "valid_targets_min": 2786 + }, + { + "epoch": 6.9306207616066775, + "grad_norm": 0.6136835389474009, + "learning_rate": 1.2874597341534512e-08, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07504836469888687, + "step": 6645, + "valid_targets_mean": 4088.1, + "valid_targets_min": 1246 + }, + { + "epoch": 6.9358372456964, + "grad_norm": 1.0709838698615528, + "learning_rate": 1.1076484158047962e-08, + "loss": 0.1762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07603618502616882, + "step": 6650, + "valid_targets_mean": 3414.8, + "valid_targets_min": 1484 + }, + { + "epoch": 6.941053729786124, + "grad_norm": 0.6195048640450049, + "learning_rate": 9.413519479004596e-09, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09652271866798401, + "step": 6655, + "valid_targets_mean": 4064.5, + "valid_targets_min": 1502 + }, + { + "epoch": 6.946270213875848, + "grad_norm": 0.630818911787676, + "learning_rate": 7.885714547990475e-09, + "loss": 0.186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08997681736946106, + "step": 6660, + "valid_targets_mean": 4174.1, + "valid_targets_min": 2134 + }, + { + "epoch": 6.951486697965571, + "grad_norm": 0.8076126650858919, + "learning_rate": 6.493079694753768e-09, + "loss": 0.2063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09069821983575821, + "step": 6665, + "valid_targets_mean": 2993.4, + "valid_targets_min": 1312 + }, + { + "epoch": 6.9567031820552945, + "grad_norm": 0.6833476391317054, + "learning_rate": 5.235624335133693e-09, + "loss": 0.185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0977087914943695, + "step": 6670, + "valid_targets_mean": 4827.2, + "valid_targets_min": 2738 + }, + { + "epoch": 6.961919666145018, + "grad_norm": 0.6462349822477899, + "learning_rate": 4.113356971002791e-09, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09063562750816345, + "step": 6675, + "valid_targets_mean": 3916.9, + "valid_targets_min": 1759 + }, + { + "epoch": 6.967136150234742, + "grad_norm": 0.6821519218157841, + "learning_rate": 3.126285190195866e-09, + "loss": 0.1833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08925284445285797, + "step": 6680, + "valid_targets_mean": 3551.9, + "valid_targets_min": 2155 + }, + { + "epoch": 6.972352634324466, + "grad_norm": 0.7561240103807355, + "learning_rate": 2.2744156664766813e-09, + "loss": 0.1934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09602029621601105, + "step": 6685, + "valid_targets_mean": 3097.1, + "valid_targets_min": 2048 + }, + { + "epoch": 6.977569118414189, + "grad_norm": 0.723298600326691, + "learning_rate": 1.5577541594802293e-09, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08091253787279129, + "step": 6690, + "valid_targets_mean": 2883.6, + "valid_targets_min": 1233 + }, + { + "epoch": 6.982785602503912, + "grad_norm": 0.648239467886201, + "learning_rate": 9.7630551467498e-10, + "loss": 0.1903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08107639849185944, + "step": 6695, + "valid_targets_mean": 3688.1, + "valid_targets_min": 1383 + }, + { + "epoch": 6.988002086593636, + "grad_norm": 0.7092619564584375, + "learning_rate": 5.300736633317982e-10, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09782654792070389, + "step": 6700, + "valid_targets_mean": 4248.2, + "valid_targets_min": 1800 + }, + { + "epoch": 6.993218570683359, + "grad_norm": 0.6743382973444576, + "learning_rate": 2.1906162250395768e-10, + "loss": 0.1875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08863303065299988, + "step": 6705, + "valid_targets_mean": 3262.4, + "valid_targets_min": 2327 + }, + { + "epoch": 6.998435054773083, + "grad_norm": 0.6877910167761603, + "learning_rate": 4.3271494996055544e-11, + "loss": 0.1902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10108942538499832, + "step": 6710, + "valid_targets_mean": 3481.8, + "valid_targets_min": 1214 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13761144876480103, + "step": 6712, + "total_flos": 3.076710259797e+18, + "train_loss": 0.10475000183190152, + "train_runtime": 99774.3326, + "train_samples_per_second": 1.076, + "train_steps_per_second": 0.067, + "valid_targets_mean": 5671.5, + "valid_targets_min": 2092 + } + ], + "logging_steps": 5, + "max_steps": 6713, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.076710259797e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}