diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,9749 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4410, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.007936507936507936, + "grad_norm": 13.936445999727159, + "learning_rate": 3.6281179138322e-07, + "loss": 0.5831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5446114540100098, + "step": 5, + "valid_targets_mean": 5181.8, + "valid_targets_min": 1333 + }, + { + "epoch": 0.015873015873015872, + "grad_norm": 14.14405406674101, + "learning_rate": 8.163265306122449e-07, + "loss": 0.6026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6077768802642822, + "step": 10, + "valid_targets_mean": 5303.6, + "valid_targets_min": 948 + }, + { + "epoch": 0.023809523809523808, + "grad_norm": 13.385853949153486, + "learning_rate": 1.26984126984127e-06, + "loss": 0.5824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5799890160560608, + "step": 15, + "valid_targets_mean": 5301.1, + "valid_targets_min": 1000 + }, + { + "epoch": 0.031746031746031744, + "grad_norm": 10.791756912090966, + "learning_rate": 1.723356009070295e-06, + "loss": 0.5901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5597653985023499, + "step": 20, + "valid_targets_mean": 4823.9, + "valid_targets_min": 263 + }, + { + "epoch": 0.03968253968253968, + "grad_norm": 6.581205621105729, + "learning_rate": 2.17687074829932e-06, + "loss": 0.5323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4685205817222595, + "step": 25, + "valid_targets_mean": 4813.8, + "valid_targets_min": 330 + }, + { + "epoch": 0.047619047619047616, + "grad_norm": 3.7853786833152236, + "learning_rate": 2.6303854875283447e-06, + "loss": 0.4573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4164578914642334, + "step": 30, + "valid_targets_mean": 5283.2, + "valid_targets_min": 538 + }, + { + "epoch": 0.05555555555555555, + "grad_norm": 2.602330607184181, + "learning_rate": 3.08390022675737e-06, + "loss": 0.423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39225059747695923, + "step": 35, + "valid_targets_mean": 4700.6, + "valid_targets_min": 2390 + }, + { + "epoch": 0.06349206349206349, + "grad_norm": 1.4233023861991225, + "learning_rate": 3.537414965986395e-06, + "loss": 0.3809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35967135429382324, + "step": 40, + "valid_targets_mean": 5445.3, + "valid_targets_min": 657 + }, + { + "epoch": 0.07142857142857142, + "grad_norm": 1.0170115963512993, + "learning_rate": 3.99092970521542e-06, + "loss": 0.402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3903147876262665, + "step": 45, + "valid_targets_mean": 6464.0, + "valid_targets_min": 3085 + }, + { + "epoch": 0.07936507936507936, + "grad_norm": 0.8370897006461435, + "learning_rate": 4.444444444444444e-06, + "loss": 0.3794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33025485277175903, + "step": 50, + "valid_targets_mean": 5718.5, + "valid_targets_min": 975 + }, + { + "epoch": 0.0873015873015873, + "grad_norm": 0.7252658896074926, + "learning_rate": 4.897959183673469e-06, + "loss": 0.3611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34258341789245605, + "step": 55, + "valid_targets_mean": 5999.1, + "valid_targets_min": 989 + }, + { + "epoch": 0.09523809523809523, + "grad_norm": 0.756722250222215, + "learning_rate": 5.3514739229024945e-06, + "loss": 0.3478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3988223075866699, + "step": 60, + "valid_targets_mean": 4506.9, + "valid_targets_min": 290 + }, + { + "epoch": 0.10317460317460317, + "grad_norm": 0.568685223882473, + "learning_rate": 5.80498866213152e-06, + "loss": 0.3448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3268760144710541, + "step": 65, + "valid_targets_mean": 6105.9, + "valid_targets_min": 2717 + }, + { + "epoch": 0.1111111111111111, + "grad_norm": 0.5731290626935265, + "learning_rate": 6.258503401360545e-06, + "loss": 0.3215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3184238374233246, + "step": 70, + "valid_targets_mean": 5088.2, + "valid_targets_min": 420 + }, + { + "epoch": 0.11904761904761904, + "grad_norm": 0.5739988755146004, + "learning_rate": 6.71201814058957e-06, + "loss": 0.3124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34080153703689575, + "step": 75, + "valid_targets_mean": 5674.6, + "valid_targets_min": 1923 + }, + { + "epoch": 0.12698412698412698, + "grad_norm": 0.5021784940463249, + "learning_rate": 7.165532879818595e-06, + "loss": 0.3008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3233030438423157, + "step": 80, + "valid_targets_mean": 5277.2, + "valid_targets_min": 291 + }, + { + "epoch": 0.1349206349206349, + "grad_norm": 0.5521401363464609, + "learning_rate": 7.61904761904762e-06, + "loss": 0.3086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2872031331062317, + "step": 85, + "valid_targets_mean": 5470.0, + "valid_targets_min": 737 + }, + { + "epoch": 0.14285714285714285, + "grad_norm": 0.4751497127812398, + "learning_rate": 8.072562358276645e-06, + "loss": 0.3222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3212955594062805, + "step": 90, + "valid_targets_mean": 6301.1, + "valid_targets_min": 2854 + }, + { + "epoch": 0.15079365079365079, + "grad_norm": 0.43696107468178264, + "learning_rate": 8.52607709750567e-06, + "loss": 0.2895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2621079683303833, + "step": 95, + "valid_targets_mean": 5617.8, + "valid_targets_min": 266 + }, + { + "epoch": 0.15873015873015872, + "grad_norm": 0.5242468737414164, + "learning_rate": 8.979591836734695e-06, + "loss": 0.3018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3428232669830322, + "step": 100, + "valid_targets_mean": 5715.6, + "valid_targets_min": 2266 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.4723266645052996, + "learning_rate": 9.43310657596372e-06, + "loss": 0.2767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27712035179138184, + "step": 105, + "valid_targets_mean": 5371.8, + "valid_targets_min": 662 + }, + { + "epoch": 0.1746031746031746, + "grad_norm": 0.49793096409078624, + "learning_rate": 9.886621315192746e-06, + "loss": 0.2986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2675686478614807, + "step": 110, + "valid_targets_mean": 4750.8, + "valid_targets_min": 662 + }, + { + "epoch": 0.18253968253968253, + "grad_norm": 0.4902776042402625, + "learning_rate": 1.034013605442177e-05, + "loss": 0.2876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30433300137519836, + "step": 115, + "valid_targets_mean": 5146.1, + "valid_targets_min": 940 + }, + { + "epoch": 0.19047619047619047, + "grad_norm": 0.465412157163947, + "learning_rate": 1.0793650793650794e-05, + "loss": 0.2969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2920241057872772, + "step": 120, + "valid_targets_mean": 5453.2, + "valid_targets_min": 841 + }, + { + "epoch": 0.1984126984126984, + "grad_norm": 0.4355039639514751, + "learning_rate": 1.124716553287982e-05, + "loss": 0.2503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2529776692390442, + "step": 125, + "valid_targets_mean": 5727.6, + "valid_targets_min": 2696 + }, + { + "epoch": 0.20634920634920634, + "grad_norm": 0.4996682572655225, + "learning_rate": 1.1700680272108845e-05, + "loss": 0.2826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2821199297904968, + "step": 130, + "valid_targets_mean": 5396.6, + "valid_targets_min": 2020 + }, + { + "epoch": 0.21428571428571427, + "grad_norm": 0.43094951290178424, + "learning_rate": 1.215419501133787e-05, + "loss": 0.2583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24614138901233673, + "step": 135, + "valid_targets_mean": 5842.7, + "valid_targets_min": 286 + }, + { + "epoch": 0.2222222222222222, + "grad_norm": 0.47723431745558176, + "learning_rate": 1.2607709750566895e-05, + "loss": 0.2845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2890702486038208, + "step": 140, + "valid_targets_mean": 5354.1, + "valid_targets_min": 2307 + }, + { + "epoch": 0.23015873015873015, + "grad_norm": 0.4617350150011481, + "learning_rate": 1.3061224489795918e-05, + "loss": 0.2586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2510530948638916, + "step": 145, + "valid_targets_mean": 5552.1, + "valid_targets_min": 608 + }, + { + "epoch": 0.23809523809523808, + "grad_norm": 0.45824070397194494, + "learning_rate": 1.3514739229024945e-05, + "loss": 0.2652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26986801624298096, + "step": 150, + "valid_targets_mean": 5441.2, + "valid_targets_min": 1175 + }, + { + "epoch": 0.24603174603174602, + "grad_norm": 0.4392573652714541, + "learning_rate": 1.3968253968253968e-05, + "loss": 0.2548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2496776133775711, + "step": 155, + "valid_targets_mean": 5731.4, + "valid_targets_min": 1393 + }, + { + "epoch": 0.25396825396825395, + "grad_norm": 0.4536964041682826, + "learning_rate": 1.4421768707482994e-05, + "loss": 0.2374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21673960983753204, + "step": 160, + "valid_targets_mean": 6079.4, + "valid_targets_min": 300 + }, + { + "epoch": 0.2619047619047619, + "grad_norm": 0.5619742860376928, + "learning_rate": 1.4875283446712018e-05, + "loss": 0.2578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2804248631000519, + "step": 165, + "valid_targets_mean": 5392.2, + "valid_targets_min": 2214 + }, + { + "epoch": 0.2698412698412698, + "grad_norm": 0.4588222248723898, + "learning_rate": 1.5328798185941044e-05, + "loss": 0.2643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24595819413661957, + "step": 170, + "valid_targets_mean": 5367.2, + "valid_targets_min": 1806 + }, + { + "epoch": 0.2777777777777778, + "grad_norm": 0.4837210875448078, + "learning_rate": 1.578231292517007e-05, + "loss": 0.2465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26808232069015503, + "step": 175, + "valid_targets_mean": 4694.2, + "valid_targets_min": 1970 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 0.4642135470304616, + "learning_rate": 1.6235827664399097e-05, + "loss": 0.2568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25557032227516174, + "step": 180, + "valid_targets_mean": 5724.9, + "valid_targets_min": 1981 + }, + { + "epoch": 0.29365079365079366, + "grad_norm": 0.4404678599566564, + "learning_rate": 1.668934240362812e-05, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2403090000152588, + "step": 185, + "valid_targets_mean": 5580.2, + "valid_targets_min": 1807 + }, + { + "epoch": 0.30158730158730157, + "grad_norm": 0.461513428421082, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.2717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2342744767665863, + "step": 190, + "valid_targets_mean": 5209.1, + "valid_targets_min": 528 + }, + { + "epoch": 0.30952380952380953, + "grad_norm": 0.4616028237565255, + "learning_rate": 1.759637188208617e-05, + "loss": 0.2573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2381066232919693, + "step": 195, + "valid_targets_mean": 4881.2, + "valid_targets_min": 1007 + }, + { + "epoch": 0.31746031746031744, + "grad_norm": 0.5048131717477712, + "learning_rate": 1.8049886621315194e-05, + "loss": 0.2497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24961230158805847, + "step": 200, + "valid_targets_mean": 4766.8, + "valid_targets_min": 760 + }, + { + "epoch": 0.3253968253968254, + "grad_norm": 0.4608218526938904, + "learning_rate": 1.8503401360544218e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22085292637348175, + "step": 205, + "valid_targets_mean": 5626.6, + "valid_targets_min": 311 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.5276409706041367, + "learning_rate": 1.8956916099773243e-05, + "loss": 0.2558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2512398362159729, + "step": 210, + "valid_targets_mean": 5207.8, + "valid_targets_min": 901 + }, + { + "epoch": 0.3412698412698413, + "grad_norm": 0.44125944724862465, + "learning_rate": 1.941043083900227e-05, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23039595782756805, + "step": 215, + "valid_targets_mean": 6150.5, + "valid_targets_min": 2819 + }, + { + "epoch": 0.3492063492063492, + "grad_norm": 0.8225576873165226, + "learning_rate": 1.9863945578231295e-05, + "loss": 0.2654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2745077311992645, + "step": 220, + "valid_targets_mean": 4586.4, + "valid_targets_min": 889 + }, + { + "epoch": 0.35714285714285715, + "grad_norm": 0.44559302418983976, + "learning_rate": 2.031746031746032e-05, + "loss": 0.2447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27263548970222473, + "step": 225, + "valid_targets_mean": 5901.4, + "valid_targets_min": 1813 + }, + { + "epoch": 0.36507936507936506, + "grad_norm": 0.4702793229346846, + "learning_rate": 2.0770975056689343e-05, + "loss": 0.2457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2387492060661316, + "step": 230, + "valid_targets_mean": 5740.0, + "valid_targets_min": 1593 + }, + { + "epoch": 0.373015873015873, + "grad_norm": 0.5771251589812603, + "learning_rate": 2.122448979591837e-05, + "loss": 0.2577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27772706747055054, + "step": 235, + "valid_targets_mean": 5892.1, + "valid_targets_min": 807 + }, + { + "epoch": 0.38095238095238093, + "grad_norm": 0.4716600277551803, + "learning_rate": 2.1678004535147395e-05, + "loss": 0.2366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24430419504642487, + "step": 240, + "valid_targets_mean": 5370.8, + "valid_targets_min": 300 + }, + { + "epoch": 0.3888888888888889, + "grad_norm": 0.4834321959641482, + "learning_rate": 2.213151927437642e-05, + "loss": 0.2335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2554604709148407, + "step": 245, + "valid_targets_mean": 5334.6, + "valid_targets_min": 800 + }, + { + "epoch": 0.3968253968253968, + "grad_norm": 0.4468651198346908, + "learning_rate": 2.2585034013605444e-05, + "loss": 0.2326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22178535163402557, + "step": 250, + "valid_targets_mean": 5966.1, + "valid_targets_min": 1724 + }, + { + "epoch": 0.40476190476190477, + "grad_norm": 0.4750302614876995, + "learning_rate": 2.3038548752834472e-05, + "loss": 0.2402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26613864302635193, + "step": 255, + "valid_targets_mean": 5677.9, + "valid_targets_min": 1011 + }, + { + "epoch": 0.4126984126984127, + "grad_norm": 0.4468674852795431, + "learning_rate": 2.3492063492063496e-05, + "loss": 0.2425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2535470724105835, + "step": 260, + "valid_targets_mean": 5571.2, + "valid_targets_min": 3116 + }, + { + "epoch": 0.42063492063492064, + "grad_norm": 0.4435957536640986, + "learning_rate": 2.394557823129252e-05, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24253222346305847, + "step": 265, + "valid_targets_mean": 5520.6, + "valid_targets_min": 2481 + }, + { + "epoch": 0.42857142857142855, + "grad_norm": 0.48192905697263166, + "learning_rate": 2.439909297052154e-05, + "loss": 0.2405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22654183208942413, + "step": 270, + "valid_targets_mean": 5040.6, + "valid_targets_min": 1020 + }, + { + "epoch": 0.4365079365079365, + "grad_norm": 0.4654909902022295, + "learning_rate": 2.4852607709750566e-05, + "loss": 0.2397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22321158647537231, + "step": 275, + "valid_targets_mean": 4938.6, + "valid_targets_min": 554 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 0.47136311558318184, + "learning_rate": 2.5306122448979597e-05, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25398164987564087, + "step": 280, + "valid_targets_mean": 5719.1, + "valid_targets_min": 1427 + }, + { + "epoch": 0.4523809523809524, + "grad_norm": 0.5143544056685512, + "learning_rate": 2.5759637188208618e-05, + "loss": 0.2538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28001242876052856, + "step": 285, + "valid_targets_mean": 4379.2, + "valid_targets_min": 275 + }, + { + "epoch": 0.4603174603174603, + "grad_norm": 0.40992433170257936, + "learning_rate": 2.6213151927437642e-05, + "loss": 0.2107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19944259524345398, + "step": 290, + "valid_targets_mean": 6113.2, + "valid_targets_min": 1496 + }, + { + "epoch": 0.46825396825396826, + "grad_norm": 0.7108778765163672, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.2316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2676636576652527, + "step": 295, + "valid_targets_mean": 4878.0, + "valid_targets_min": 1397 + }, + { + "epoch": 0.47619047619047616, + "grad_norm": 0.7424501325702221, + "learning_rate": 2.7120181405895694e-05, + "loss": 0.2345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27534204721450806, + "step": 300, + "valid_targets_mean": 4737.1, + "valid_targets_min": 1195 + }, + { + "epoch": 0.48412698412698413, + "grad_norm": 0.4554032952042926, + "learning_rate": 2.757369614512472e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23061871528625488, + "step": 305, + "valid_targets_mean": 5816.9, + "valid_targets_min": 935 + }, + { + "epoch": 0.49206349206349204, + "grad_norm": 0.43918142641279273, + "learning_rate": 2.8027210884353743e-05, + "loss": 0.246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2266923487186432, + "step": 310, + "valid_targets_mean": 5512.4, + "valid_targets_min": 674 + }, + { + "epoch": 0.5, + "grad_norm": 0.46398515425879433, + "learning_rate": 2.8480725623582767e-05, + "loss": 0.2234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21582689881324768, + "step": 315, + "valid_targets_mean": 5874.5, + "valid_targets_min": 1816 + }, + { + "epoch": 0.5079365079365079, + "grad_norm": 0.4043326688329922, + "learning_rate": 2.893424036281179e-05, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21339790523052216, + "step": 320, + "valid_targets_mean": 5820.1, + "valid_targets_min": 282 + }, + { + "epoch": 0.5158730158730159, + "grad_norm": 0.502316882086023, + "learning_rate": 2.938775510204082e-05, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20219871401786804, + "step": 325, + "valid_targets_mean": 5578.6, + "valid_targets_min": 1540 + }, + { + "epoch": 0.5238095238095238, + "grad_norm": 0.9835515697061404, + "learning_rate": 2.9841269841269844e-05, + "loss": 0.2323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2298191487789154, + "step": 330, + "valid_targets_mean": 4911.0, + "valid_targets_min": 632 + }, + { + "epoch": 0.5317460317460317, + "grad_norm": 0.4647735663644707, + "learning_rate": 3.0294784580498868e-05, + "loss": 0.2339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22781239449977875, + "step": 335, + "valid_targets_mean": 5532.8, + "valid_targets_min": 1535 + }, + { + "epoch": 0.5396825396825397, + "grad_norm": 0.5438389697935898, + "learning_rate": 3.074829931972789e-05, + "loss": 0.2375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22640562057495117, + "step": 340, + "valid_targets_mean": 4285.8, + "valid_targets_min": 727 + }, + { + "epoch": 0.5476190476190477, + "grad_norm": 0.4334285919558895, + "learning_rate": 3.1201814058956924e-05, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21913409233093262, + "step": 345, + "valid_targets_mean": 5222.7, + "valid_targets_min": 285 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 0.4738405937337707, + "learning_rate": 3.1655328798185945e-05, + "loss": 0.2378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25035279989242554, + "step": 350, + "valid_targets_mean": 5209.8, + "valid_targets_min": 833 + }, + { + "epoch": 0.5634920634920635, + "grad_norm": 0.4555272170875938, + "learning_rate": 3.2108843537414965e-05, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21295906603336334, + "step": 355, + "valid_targets_mean": 4887.6, + "valid_targets_min": 845 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 0.4607956603180466, + "learning_rate": 3.256235827664399e-05, + "loss": 0.2242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23422232270240784, + "step": 360, + "valid_targets_mean": 5982.9, + "valid_targets_min": 3791 + }, + { + "epoch": 0.5793650793650794, + "grad_norm": 0.46019446827222377, + "learning_rate": 3.3015873015873014e-05, + "loss": 0.234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21903209388256073, + "step": 365, + "valid_targets_mean": 5330.9, + "valid_targets_min": 2257 + }, + { + "epoch": 0.5873015873015873, + "grad_norm": 0.6042052076393369, + "learning_rate": 3.346938775510204e-05, + "loss": 0.2358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2550661861896515, + "step": 370, + "valid_targets_mean": 4556.8, + "valid_targets_min": 763 + }, + { + "epoch": 0.5952380952380952, + "grad_norm": 0.44853135001478606, + "learning_rate": 3.392290249433107e-05, + "loss": 0.2178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.202229842543602, + "step": 375, + "valid_targets_mean": 5618.4, + "valid_targets_min": 287 + }, + { + "epoch": 0.6031746031746031, + "grad_norm": 0.4114084841092905, + "learning_rate": 3.437641723356009e-05, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.208021879196167, + "step": 380, + "valid_targets_mean": 6323.6, + "valid_targets_min": 3070 + }, + { + "epoch": 0.6111111111111112, + "grad_norm": 0.4907016504547875, + "learning_rate": 3.482993197278912e-05, + "loss": 0.2096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2262699007987976, + "step": 385, + "valid_targets_mean": 5458.8, + "valid_targets_min": 1233 + }, + { + "epoch": 0.6190476190476191, + "grad_norm": 0.4470894371245897, + "learning_rate": 3.5283446712018146e-05, + "loss": 0.2147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2284412682056427, + "step": 390, + "valid_targets_mean": 5275.5, + "valid_targets_min": 614 + }, + { + "epoch": 0.626984126984127, + "grad_norm": 0.8292394623205982, + "learning_rate": 3.573696145124717e-05, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23321247100830078, + "step": 395, + "valid_targets_mean": 4546.7, + "valid_targets_min": 1120 + }, + { + "epoch": 0.6349206349206349, + "grad_norm": 0.5101519952218712, + "learning_rate": 3.6190476190476195e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22552572190761566, + "step": 400, + "valid_targets_mean": 4713.0, + "valid_targets_min": 403 + }, + { + "epoch": 0.6428571428571429, + "grad_norm": 0.4782175383328679, + "learning_rate": 3.6643990929705216e-05, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22066287696361542, + "step": 405, + "valid_targets_mean": 5557.4, + "valid_targets_min": 1431 + }, + { + "epoch": 0.6507936507936508, + "grad_norm": 0.6813010668498629, + "learning_rate": 3.7097505668934243e-05, + "loss": 0.2304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23460529744625092, + "step": 410, + "valid_targets_mean": 4301.2, + "valid_targets_min": 290 + }, + { + "epoch": 0.6587301587301587, + "grad_norm": 0.42762135214795316, + "learning_rate": 3.755102040816327e-05, + "loss": 0.2218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22290262579917908, + "step": 415, + "valid_targets_mean": 5696.5, + "valid_targets_min": 1242 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.7113003892096883, + "learning_rate": 3.800453514739229e-05, + "loss": 0.2307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24848781526088715, + "step": 420, + "valid_targets_mean": 5861.1, + "valid_targets_min": 1829 + }, + { + "epoch": 0.6746031746031746, + "grad_norm": 0.4223786613285171, + "learning_rate": 3.845804988662132e-05, + "loss": 0.2361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23021358251571655, + "step": 425, + "valid_targets_mean": 5603.6, + "valid_targets_min": 2005 + }, + { + "epoch": 0.6825396825396826, + "grad_norm": 0.46482319003042916, + "learning_rate": 3.891156462585034e-05, + "loss": 0.2369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2257668375968933, + "step": 430, + "valid_targets_mean": 5665.9, + "valid_targets_min": 936 + }, + { + "epoch": 0.6904761904761905, + "grad_norm": 0.49517195170111594, + "learning_rate": 3.936507936507937e-05, + "loss": 0.2219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24368128180503845, + "step": 435, + "valid_targets_mean": 5276.9, + "valid_targets_min": 965 + }, + { + "epoch": 0.6984126984126984, + "grad_norm": 0.4533449896226927, + "learning_rate": 3.9818594104308396e-05, + "loss": 0.2227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2182699739933014, + "step": 440, + "valid_targets_mean": 5190.2, + "valid_targets_min": 609 + }, + { + "epoch": 0.7063492063492064, + "grad_norm": 0.47604972418524916, + "learning_rate": 3.999994361288785e-05, + "loss": 0.2316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27464592456817627, + "step": 445, + "valid_targets_mean": 5377.2, + "valid_targets_min": 299 + }, + { + "epoch": 0.7142857142857143, + "grad_norm": 0.4843309348827549, + "learning_rate": 3.9999599026131644e-05, + "loss": 0.2078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21664546430110931, + "step": 450, + "valid_targets_mean": 5402.9, + "valid_targets_min": 1359 + }, + { + "epoch": 0.7222222222222222, + "grad_norm": 0.4382635205661119, + "learning_rate": 3.999894118418342e-05, + "loss": 0.2176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22355255484580994, + "step": 455, + "valid_targets_mean": 5052.1, + "valid_targets_min": 2069 + }, + { + "epoch": 0.7301587301587301, + "grad_norm": 0.472980100407786, + "learning_rate": 3.999797009734697e-05, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22827665507793427, + "step": 460, + "valid_targets_mean": 5404.2, + "valid_targets_min": 1744 + }, + { + "epoch": 0.7380952380952381, + "grad_norm": 0.43558241143554155, + "learning_rate": 3.999668578083253e-05, + "loss": 0.2344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22950908541679382, + "step": 465, + "valid_targets_mean": 5786.8, + "valid_targets_min": 688 + }, + { + "epoch": 0.746031746031746, + "grad_norm": 0.39301407866116134, + "learning_rate": 3.9995088254756434e-05, + "loss": 0.2171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20144161581993103, + "step": 470, + "valid_targets_mean": 6067.9, + "valid_targets_min": 3477 + }, + { + "epoch": 0.753968253968254, + "grad_norm": 0.4335866805247727, + "learning_rate": 3.999317754414084e-05, + "loss": 0.2162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24468550086021423, + "step": 475, + "valid_targets_mean": 5383.6, + "valid_targets_min": 297 + }, + { + "epoch": 0.7619047619047619, + "grad_norm": 0.43530076929025974, + "learning_rate": 3.999095367891337e-05, + "loss": 0.2064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2087598741054535, + "step": 480, + "valid_targets_mean": 5412.7, + "valid_targets_min": 241 + }, + { + "epoch": 0.7698412698412699, + "grad_norm": 0.42581385510950404, + "learning_rate": 3.9988416693906563e-05, + "loss": 0.2149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20418357849121094, + "step": 485, + "valid_targets_mean": 5400.4, + "valid_targets_min": 618 + }, + { + "epoch": 0.7777777777777778, + "grad_norm": 0.6201401443531581, + "learning_rate": 3.9985566628857425e-05, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22904717922210693, + "step": 490, + "valid_targets_mean": 5283.9, + "valid_targets_min": 1641 + }, + { + "epoch": 0.7857142857142857, + "grad_norm": 0.40573600003328514, + "learning_rate": 3.998240352840672e-05, + "loss": 0.2369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19750481843948364, + "step": 495, + "valid_targets_mean": 5820.5, + "valid_targets_min": 2527 + }, + { + "epoch": 0.7936507936507936, + "grad_norm": 0.6282176812699868, + "learning_rate": 3.997892744209833e-05, + "loss": 0.2157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21052823960781097, + "step": 500, + "valid_targets_mean": 3286.4, + "valid_targets_min": 284 + }, + { + "epoch": 0.8015873015873016, + "grad_norm": 0.4150664416041001, + "learning_rate": 3.997513842437845e-05, + "loss": 0.2168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20114190876483917, + "step": 505, + "valid_targets_mean": 6746.6, + "valid_targets_min": 3503 + }, + { + "epoch": 0.8095238095238095, + "grad_norm": 0.5580131451084269, + "learning_rate": 3.997103653459475e-05, + "loss": 0.2255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21351292729377747, + "step": 510, + "valid_targets_mean": 5427.2, + "valid_targets_min": 1310 + }, + { + "epoch": 0.8174603174603174, + "grad_norm": 0.4539249289508295, + "learning_rate": 3.996662183699541e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22894957661628723, + "step": 515, + "valid_targets_mean": 6587.3, + "valid_targets_min": 2987 + }, + { + "epoch": 0.8253968253968254, + "grad_norm": 0.4606135797915741, + "learning_rate": 3.996189440072818e-05, + "loss": 0.2235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23994050920009613, + "step": 520, + "valid_targets_mean": 5603.4, + "valid_targets_min": 963 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.46485471964434133, + "learning_rate": 3.9956854299839246e-05, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24385607242584229, + "step": 525, + "valid_targets_mean": 5554.9, + "valid_targets_min": 1998 + }, + { + "epoch": 0.8412698412698413, + "grad_norm": 0.5070540327503203, + "learning_rate": 3.9951501613272076e-05, + "loss": 0.2238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2311379760503769, + "step": 530, + "valid_targets_mean": 5792.5, + "valid_targets_min": 2950 + }, + { + "epoch": 0.8492063492063492, + "grad_norm": 0.4509301237412979, + "learning_rate": 3.994583642486618e-05, + "loss": 0.2155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22014960646629333, + "step": 535, + "valid_targets_mean": 5551.4, + "valid_targets_min": 2454 + }, + { + "epoch": 0.8571428571428571, + "grad_norm": 0.37450581843880576, + "learning_rate": 3.993985882335584e-05, + "loss": 0.2102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17562690377235413, + "step": 540, + "valid_targets_mean": 6399.9, + "valid_targets_min": 865 + }, + { + "epoch": 0.8650793650793651, + "grad_norm": 0.4181520509392521, + "learning_rate": 3.993356890236866e-05, + "loss": 0.2178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1997915804386139, + "step": 545, + "valid_targets_mean": 5282.3, + "valid_targets_min": 307 + }, + { + "epoch": 0.873015873015873, + "grad_norm": 0.40651126602493426, + "learning_rate": 3.992696676042414e-05, + "loss": 0.2135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20215590298175812, + "step": 550, + "valid_targets_mean": 6478.2, + "valid_targets_min": 1253 + }, + { + "epoch": 0.8809523809523809, + "grad_norm": 0.42510576646915893, + "learning_rate": 3.992005250093211e-05, + "loss": 0.2162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20907750725746155, + "step": 555, + "valid_targets_mean": 5629.7, + "valid_targets_min": 1539 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 0.4812454511222598, + "learning_rate": 3.991282623219113e-05, + "loss": 0.2149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22547686100006104, + "step": 560, + "valid_targets_mean": 4727.8, + "valid_targets_min": 1324 + }, + { + "epoch": 0.8968253968253969, + "grad_norm": 0.4203217407948123, + "learning_rate": 3.9905288067386776e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21304470300674438, + "step": 565, + "valid_targets_mean": 5743.1, + "valid_targets_min": 2659 + }, + { + "epoch": 0.9047619047619048, + "grad_norm": 0.5536262458911827, + "learning_rate": 3.989743812458987e-05, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22015127539634705, + "step": 570, + "valid_targets_mean": 5742.7, + "valid_targets_min": 501 + }, + { + "epoch": 0.9126984126984127, + "grad_norm": 0.44923723778131375, + "learning_rate": 3.9889276526754664e-05, + "loss": 0.2187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2490832507610321, + "step": 575, + "valid_targets_mean": 5566.1, + "valid_targets_min": 906 + }, + { + "epoch": 0.9206349206349206, + "grad_norm": 0.4833832932237736, + "learning_rate": 3.988080340171685e-05, + "loss": 0.2229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2266359031200409, + "step": 580, + "valid_targets_mean": 4794.4, + "valid_targets_min": 510 + }, + { + "epoch": 0.9285714285714286, + "grad_norm": 0.5314150951022422, + "learning_rate": 3.987201888219161e-05, + "loss": 0.2096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23443414270877838, + "step": 585, + "valid_targets_mean": 3981.2, + "valid_targets_min": 577 + }, + { + "epoch": 0.9365079365079365, + "grad_norm": 0.44573566583926183, + "learning_rate": 3.986292310577153e-05, + "loss": 0.2096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1997448205947876, + "step": 590, + "valid_targets_mean": 5087.9, + "valid_targets_min": 256 + }, + { + "epoch": 0.9444444444444444, + "grad_norm": 0.4823118351901083, + "learning_rate": 3.9853516214924416e-05, + "loss": 0.2362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21190452575683594, + "step": 595, + "valid_targets_mean": 4769.4, + "valid_targets_min": 1789 + }, + { + "epoch": 0.9523809523809523, + "grad_norm": 0.9701252803781317, + "learning_rate": 3.9843798356991096e-05, + "loss": 0.2033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21730470657348633, + "step": 600, + "valid_targets_mean": 5590.2, + "valid_targets_min": 871 + }, + { + "epoch": 0.9603174603174603, + "grad_norm": 0.8528374645310175, + "learning_rate": 3.9833769684183104e-05, + "loss": 0.2378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25169217586517334, + "step": 605, + "valid_targets_mean": 5481.6, + "valid_targets_min": 648 + }, + { + "epoch": 0.9682539682539683, + "grad_norm": 0.5719512687258413, + "learning_rate": 3.982343035358026e-05, + "loss": 0.2426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25562554597854614, + "step": 610, + "valid_targets_mean": 4258.5, + "valid_targets_min": 852 + }, + { + "epoch": 0.9761904761904762, + "grad_norm": 0.42417870906303506, + "learning_rate": 3.981278052712827e-05, + "loss": 0.2186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19715425372123718, + "step": 615, + "valid_targets_mean": 6383.4, + "valid_targets_min": 2557 + }, + { + "epoch": 0.9841269841269841, + "grad_norm": 0.5592731470756656, + "learning_rate": 3.9801820371636157e-05, + "loss": 0.2205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21826912462711334, + "step": 620, + "valid_targets_mean": 5242.9, + "valid_targets_min": 540 + }, + { + "epoch": 0.9920634920634921, + "grad_norm": 0.45536035098972394, + "learning_rate": 3.979055005877364e-05, + "loss": 0.2103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23044517636299133, + "step": 625, + "valid_targets_mean": 6434.3, + "valid_targets_min": 1909 + }, + { + "epoch": 1.0, + "grad_norm": 0.45688834618759994, + "learning_rate": 3.977896976506845e-05, + "loss": 0.2262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20650140941143036, + "step": 630, + "valid_targets_mean": 4983.3, + "valid_targets_min": 920 + }, + { + "epoch": 1.007936507936508, + "grad_norm": 0.45783302985716545, + "learning_rate": 3.976707967190358e-05, + "loss": 0.2009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1878741830587387, + "step": 635, + "valid_targets_mean": 6036.8, + "valid_targets_min": 1419 + }, + { + "epoch": 1.0158730158730158, + "grad_norm": 0.3696615133926402, + "learning_rate": 3.9754879965514456e-05, + "loss": 0.2187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17767128348350525, + "step": 640, + "valid_targets_mean": 6949.4, + "valid_targets_min": 1382 + }, + { + "epoch": 1.0238095238095237, + "grad_norm": 0.5301335563601449, + "learning_rate": 3.9742370836985956e-05, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18509696424007416, + "step": 645, + "valid_targets_mean": 5665.9, + "valid_targets_min": 3373 + }, + { + "epoch": 1.0317460317460316, + "grad_norm": 0.45339150425457114, + "learning_rate": 3.972955248224949e-05, + "loss": 0.1957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19851231575012207, + "step": 650, + "valid_targets_mean": 5496.2, + "valid_targets_min": 1700 + }, + { + "epoch": 1.0396825396825398, + "grad_norm": 0.4563819518280238, + "learning_rate": 3.971642510207989e-05, + "loss": 0.1999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20360763370990753, + "step": 655, + "valid_targets_mean": 5350.4, + "valid_targets_min": 333 + }, + { + "epoch": 1.0476190476190477, + "grad_norm": 0.5625847542647227, + "learning_rate": 3.9702988902092274e-05, + "loss": 0.2, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1898064911365509, + "step": 660, + "valid_targets_mean": 5748.2, + "valid_targets_min": 1540 + }, + { + "epoch": 1.0555555555555556, + "grad_norm": 0.425299703122137, + "learning_rate": 3.968924409273884e-05, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19356761872768402, + "step": 665, + "valid_targets_mean": 5209.3, + "valid_targets_min": 1985 + }, + { + "epoch": 1.0634920634920635, + "grad_norm": 0.47286769247229504, + "learning_rate": 3.9675190889305545e-05, + "loss": 0.2005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1817602813243866, + "step": 670, + "valid_targets_mean": 5672.6, + "valid_targets_min": 959 + }, + { + "epoch": 1.0714285714285714, + "grad_norm": 0.432773210737844, + "learning_rate": 3.966082951190874e-05, + "loss": 0.1861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2104063630104065, + "step": 675, + "valid_targets_mean": 6248.0, + "valid_targets_min": 1230 + }, + { + "epoch": 1.0793650793650793, + "grad_norm": 0.4706439453673335, + "learning_rate": 3.9646160185491756e-05, + "loss": 0.201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18827101588249207, + "step": 680, + "valid_targets_mean": 4876.6, + "valid_targets_min": 1043 + }, + { + "epoch": 1.0873015873015872, + "grad_norm": 0.5338851223858002, + "learning_rate": 3.963118313982131e-05, + "loss": 0.1941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18436592817306519, + "step": 685, + "valid_targets_mean": 4361.0, + "valid_targets_min": 1130 + }, + { + "epoch": 1.0952380952380953, + "grad_norm": 0.41520798715060503, + "learning_rate": 3.961589860948399e-05, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21202310919761658, + "step": 690, + "valid_targets_mean": 5486.4, + "valid_targets_min": 1888 + }, + { + "epoch": 1.1031746031746033, + "grad_norm": 0.42676794941537016, + "learning_rate": 3.960030683388251e-05, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17921066284179688, + "step": 695, + "valid_targets_mean": 5590.2, + "valid_targets_min": 217 + }, + { + "epoch": 1.1111111111111112, + "grad_norm": 0.40164984647020757, + "learning_rate": 3.9584408057232e-05, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17369890213012695, + "step": 700, + "valid_targets_mean": 5022.5, + "valid_targets_min": 268 + }, + { + "epoch": 1.119047619047619, + "grad_norm": 0.4557865055131249, + "learning_rate": 3.956820252855618e-05, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21178218722343445, + "step": 705, + "valid_targets_mean": 5212.6, + "valid_targets_min": 1650 + }, + { + "epoch": 1.126984126984127, + "grad_norm": 0.4737440529809228, + "learning_rate": 3.955169050168343e-05, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18235670030117035, + "step": 710, + "valid_targets_mean": 4897.6, + "valid_targets_min": 1233 + }, + { + "epoch": 1.1349206349206349, + "grad_norm": 0.36301667189206566, + "learning_rate": 3.953487223524283e-05, + "loss": 0.1911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15816360712051392, + "step": 715, + "valid_targets_mean": 6158.5, + "valid_targets_min": 2000 + }, + { + "epoch": 1.1428571428571428, + "grad_norm": 0.4512962324237001, + "learning_rate": 3.951774799266014e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1824280321598053, + "step": 720, + "valid_targets_mean": 5561.8, + "valid_targets_min": 2560 + }, + { + "epoch": 1.1507936507936507, + "grad_norm": 0.4511979075727613, + "learning_rate": 3.950031804215364e-05, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2006044089794159, + "step": 725, + "valid_targets_mean": 5573.5, + "valid_targets_min": 1633 + }, + { + "epoch": 1.1587301587301586, + "grad_norm": 0.42676374430180725, + "learning_rate": 3.948258265672991e-05, + "loss": 0.2072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21262499690055847, + "step": 730, + "valid_targets_mean": 5894.2, + "valid_targets_min": 513 + }, + { + "epoch": 1.1666666666666667, + "grad_norm": 0.669542399200941, + "learning_rate": 3.946454211417961e-05, + "loss": 0.1946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20334014296531677, + "step": 735, + "valid_targets_mean": 5074.4, + "valid_targets_min": 690 + }, + { + "epoch": 1.1746031746031746, + "grad_norm": 0.45162646285557456, + "learning_rate": 3.944619669707309e-05, + "loss": 0.2193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.215205579996109, + "step": 740, + "valid_targets_mean": 5439.4, + "valid_targets_min": 1907 + }, + { + "epoch": 1.1825396825396826, + "grad_norm": 0.4710256071640483, + "learning_rate": 3.9427546692755946e-05, + "loss": 0.2082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21397551894187927, + "step": 745, + "valid_targets_mean": 5276.3, + "valid_targets_min": 1098 + }, + { + "epoch": 1.1904761904761905, + "grad_norm": 0.4625751950531012, + "learning_rate": 3.9408592393344596e-05, + "loss": 0.2041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19521868228912354, + "step": 750, + "valid_targets_mean": 5905.4, + "valid_targets_min": 2861 + }, + { + "epoch": 1.1984126984126984, + "grad_norm": 0.4677554812825755, + "learning_rate": 3.9389334095721606e-05, + "loss": 0.2025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21653307974338531, + "step": 755, + "valid_targets_mean": 4797.6, + "valid_targets_min": 895 + }, + { + "epoch": 1.2063492063492063, + "grad_norm": 0.546242046347798, + "learning_rate": 3.936977210153113e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20400574803352356, + "step": 760, + "valid_targets_mean": 5519.1, + "valid_targets_min": 896 + }, + { + "epoch": 1.2142857142857142, + "grad_norm": 0.4204392123363325, + "learning_rate": 3.93499067171741e-05, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22943107783794403, + "step": 765, + "valid_targets_mean": 6420.3, + "valid_targets_min": 1329 + }, + { + "epoch": 1.2222222222222223, + "grad_norm": 0.4682155865822684, + "learning_rate": 3.932973825380351e-05, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22972063720226288, + "step": 770, + "valid_targets_mean": 4946.4, + "valid_targets_min": 933 + }, + { + "epoch": 1.2301587301587302, + "grad_norm": 0.5301139897940557, + "learning_rate": 3.9309267027319485e-05, + "loss": 0.2052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23306161165237427, + "step": 775, + "valid_targets_mean": 3961.6, + "valid_targets_min": 222 + }, + { + "epoch": 1.2380952380952381, + "grad_norm": 0.40365821128282314, + "learning_rate": 3.928849335836435e-05, + "loss": 0.1902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18574738502502441, + "step": 780, + "valid_targets_mean": 5806.0, + "valid_targets_min": 2320 + }, + { + "epoch": 1.246031746031746, + "grad_norm": 0.47813886067964756, + "learning_rate": 3.926741757231761e-05, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21044926345348358, + "step": 785, + "valid_targets_mean": 5299.6, + "valid_targets_min": 1723 + }, + { + "epoch": 1.253968253968254, + "grad_norm": 0.40253903041192324, + "learning_rate": 3.924603999929086e-05, + "loss": 0.2169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2096974402666092, + "step": 790, + "valid_targets_mean": 5558.8, + "valid_targets_min": 543 + }, + { + "epoch": 1.2619047619047619, + "grad_norm": 0.4549298029596003, + "learning_rate": 3.9224360974122584e-05, + "loss": 0.2109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23242492973804474, + "step": 795, + "valid_targets_mean": 5270.6, + "valid_targets_min": 2212 + }, + { + "epoch": 1.2698412698412698, + "grad_norm": 0.4695471820608219, + "learning_rate": 3.920238083637297e-05, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20831601321697235, + "step": 800, + "valid_targets_mean": 5308.8, + "valid_targets_min": 883 + }, + { + "epoch": 1.2777777777777777, + "grad_norm": 0.7642610362979803, + "learning_rate": 3.9180099930318524e-05, + "loss": 0.2037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1991654336452484, + "step": 805, + "valid_targets_mean": 6815.1, + "valid_targets_min": 1462 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 0.4440164128697404, + "learning_rate": 3.915751860494672e-05, + "loss": 0.1893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16493773460388184, + "step": 810, + "valid_targets_mean": 5028.6, + "valid_targets_min": 397 + }, + { + "epoch": 1.2936507936507937, + "grad_norm": 0.43322694583144405, + "learning_rate": 3.913463721395051e-05, + "loss": 0.2092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1892578899860382, + "step": 815, + "valid_targets_mean": 4793.7, + "valid_targets_min": 343 + }, + { + "epoch": 1.3015873015873016, + "grad_norm": 0.4113857371999088, + "learning_rate": 3.911145611572282e-05, + "loss": 0.2014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22850877046585083, + "step": 820, + "valid_targets_mean": 6205.9, + "valid_targets_min": 3119 + }, + { + "epoch": 1.3095238095238095, + "grad_norm": 0.4024454753442706, + "learning_rate": 3.908797567335089e-05, + "loss": 0.2043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1995319277048111, + "step": 825, + "valid_targets_mean": 6545.4, + "valid_targets_min": 2059 + }, + { + "epoch": 1.3174603174603174, + "grad_norm": 0.4766793634475361, + "learning_rate": 3.906419625461062e-05, + "loss": 0.1979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21162061393260956, + "step": 830, + "valid_targets_mean": 5270.3, + "valid_targets_min": 294 + }, + { + "epoch": 1.3253968253968254, + "grad_norm": 0.43124302476576, + "learning_rate": 3.90401182319608e-05, + "loss": 0.2112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2024182230234146, + "step": 835, + "valid_targets_mean": 5587.5, + "valid_targets_min": 946 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.37620838473431895, + "learning_rate": 3.9015741982537265e-05, + "loss": 0.1894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16263701021671295, + "step": 840, + "valid_targets_mean": 6486.5, + "valid_targets_min": 777 + }, + { + "epoch": 1.3412698412698414, + "grad_norm": 0.4227366879146679, + "learning_rate": 3.899106788814701e-05, + "loss": 0.2324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19696393609046936, + "step": 845, + "valid_targets_mean": 5981.3, + "valid_targets_min": 1252 + }, + { + "epoch": 1.3492063492063493, + "grad_norm": 0.4396285787879683, + "learning_rate": 3.896609633526219e-05, + "loss": 0.213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21186983585357666, + "step": 850, + "valid_targets_mean": 5076.4, + "valid_targets_min": 525 + }, + { + "epoch": 1.3571428571428572, + "grad_norm": 0.4809038294139755, + "learning_rate": 3.894082771501407e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.191837877035141, + "step": 855, + "valid_targets_mean": 5344.6, + "valid_targets_min": 3615 + }, + { + "epoch": 1.3650793650793651, + "grad_norm": 0.48473797503352833, + "learning_rate": 3.891526242318692e-05, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16904303431510925, + "step": 860, + "valid_targets_mean": 4677.6, + "valid_targets_min": 281 + }, + { + "epoch": 1.373015873015873, + "grad_norm": 0.3848141194705589, + "learning_rate": 3.8889400860211785e-05, + "loss": 0.2045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19609695672988892, + "step": 865, + "valid_targets_mean": 5797.4, + "valid_targets_min": 577 + }, + { + "epoch": 1.380952380952381, + "grad_norm": 0.43935568195512203, + "learning_rate": 3.886324343116023e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21925979852676392, + "step": 870, + "valid_targets_mean": 5510.4, + "valid_targets_min": 2795 + }, + { + "epoch": 1.3888888888888888, + "grad_norm": 0.3787284592232413, + "learning_rate": 3.883679054573799e-05, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17324265837669373, + "step": 875, + "valid_targets_mean": 6371.8, + "valid_targets_min": 1851 + }, + { + "epoch": 1.3968253968253967, + "grad_norm": 0.48231311291457907, + "learning_rate": 3.881004261827856e-05, + "loss": 0.2057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22266656160354614, + "step": 880, + "valid_targets_mean": 5407.8, + "valid_targets_min": 247 + }, + { + "epoch": 1.4047619047619047, + "grad_norm": 0.39562200883867676, + "learning_rate": 3.878300006773669e-05, + "loss": 0.1996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19309642910957336, + "step": 885, + "valid_targets_mean": 6324.1, + "valid_targets_min": 3843 + }, + { + "epoch": 1.4126984126984126, + "grad_norm": 0.4536286972970636, + "learning_rate": 3.875566331768184e-05, + "loss": 0.1989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.205659881234169, + "step": 890, + "valid_targets_mean": 4451.7, + "valid_targets_min": 766 + }, + { + "epoch": 1.4206349206349207, + "grad_norm": 0.40917484623660716, + "learning_rate": 3.872803279629155e-05, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21330603957176208, + "step": 895, + "valid_targets_mean": 5915.2, + "valid_targets_min": 2792 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 0.44208512274029477, + "learning_rate": 3.8700108936344705e-05, + "loss": 0.1981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21972954273223877, + "step": 900, + "valid_targets_mean": 4555.5, + "valid_targets_min": 299 + }, + { + "epoch": 1.4365079365079365, + "grad_norm": 0.44093255819884136, + "learning_rate": 3.867189217521477e-05, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20141087472438812, + "step": 905, + "valid_targets_mean": 5635.6, + "valid_targets_min": 2966 + }, + { + "epoch": 1.4444444444444444, + "grad_norm": 0.4623153556330511, + "learning_rate": 3.864338295486297e-05, + "loss": 0.2042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21589690446853638, + "step": 910, + "valid_targets_mean": 4678.0, + "valid_targets_min": 514 + }, + { + "epoch": 1.4523809523809523, + "grad_norm": 0.41249982388927653, + "learning_rate": 3.8614581721831316e-05, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2026449292898178, + "step": 915, + "valid_targets_mean": 5216.7, + "valid_targets_min": 591 + }, + { + "epoch": 1.4603174603174602, + "grad_norm": 0.4104480770890901, + "learning_rate": 3.858548892723563e-05, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1731051206588745, + "step": 920, + "valid_targets_mean": 5339.4, + "valid_targets_min": 736 + }, + { + "epoch": 1.4682539682539684, + "grad_norm": 0.41895380180198477, + "learning_rate": 3.855610502675851e-05, + "loss": 0.1987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2031707614660263, + "step": 925, + "valid_targets_mean": 5263.9, + "valid_targets_min": 1601 + }, + { + "epoch": 1.4761904761904763, + "grad_norm": 0.4261994122248901, + "learning_rate": 3.852643048064215e-05, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2084016501903534, + "step": 930, + "valid_targets_mean": 5398.7, + "valid_targets_min": 366 + }, + { + "epoch": 1.4841269841269842, + "grad_norm": 0.44461914330875957, + "learning_rate": 3.8496465753681145e-05, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.203968346118927, + "step": 935, + "valid_targets_mean": 5705.4, + "valid_targets_min": 293 + }, + { + "epoch": 1.492063492063492, + "grad_norm": 0.3619697764513735, + "learning_rate": 3.846621131521522e-05, + "loss": 0.1942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16397273540496826, + "step": 940, + "valid_targets_mean": 5586.3, + "valid_targets_min": 603 + }, + { + "epoch": 1.5, + "grad_norm": 0.47927964908668164, + "learning_rate": 3.843566763912187e-05, + "loss": 0.1938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19876304268836975, + "step": 945, + "valid_targets_mean": 5481.6, + "valid_targets_min": 2470 + }, + { + "epoch": 1.507936507936508, + "grad_norm": 0.43177345256816557, + "learning_rate": 3.840483520380896e-05, + "loss": 0.1979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19520597159862518, + "step": 950, + "valid_targets_mean": 5171.9, + "valid_targets_min": 578 + }, + { + "epoch": 1.5158730158730158, + "grad_norm": 0.45814725913527155, + "learning_rate": 3.837371449220717e-05, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.191867396235466, + "step": 955, + "valid_targets_mean": 6292.2, + "valid_targets_min": 3359 + }, + { + "epoch": 1.5238095238095237, + "grad_norm": 0.3660046705130465, + "learning_rate": 3.834230599176251e-05, + "loss": 0.1888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16699504852294922, + "step": 960, + "valid_targets_mean": 6213.4, + "valid_targets_min": 2488 + }, + { + "epoch": 1.5317460317460316, + "grad_norm": 0.3890844674038831, + "learning_rate": 3.831061019442864e-05, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20834946632385254, + "step": 965, + "valid_targets_mean": 5682.8, + "valid_targets_min": 855 + }, + { + "epoch": 1.5396825396825395, + "grad_norm": 0.442290989827857, + "learning_rate": 3.827862759665916e-05, + "loss": 0.2217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2266271859407425, + "step": 970, + "valid_targets_mean": 5248.5, + "valid_targets_min": 1995 + }, + { + "epoch": 1.5476190476190477, + "grad_norm": 0.4156925522612072, + "learning_rate": 3.8246358699399853e-05, + "loss": 0.1998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16675469279289246, + "step": 975, + "valid_targets_mean": 4952.9, + "valid_targets_min": 277 + }, + { + "epoch": 1.5555555555555556, + "grad_norm": 0.37604931667074504, + "learning_rate": 3.8213804008080824e-05, + "loss": 0.1981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18879935145378113, + "step": 980, + "valid_targets_mean": 6674.0, + "valid_targets_min": 3077 + }, + { + "epoch": 1.5634920634920635, + "grad_norm": 0.4287194995251398, + "learning_rate": 3.818096403260862e-05, + "loss": 0.1951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20438791811466217, + "step": 985, + "valid_targets_mean": 5784.4, + "valid_targets_min": 1565 + }, + { + "epoch": 1.5714285714285714, + "grad_norm": 0.44022259559851695, + "learning_rate": 3.8147839287358185e-05, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21569909155368805, + "step": 990, + "valid_targets_mean": 5262.6, + "valid_targets_min": 1953 + }, + { + "epoch": 1.5793650793650795, + "grad_norm": 0.466156003317299, + "learning_rate": 3.8114430291164836e-05, + "loss": 0.1833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1877652108669281, + "step": 995, + "valid_targets_mean": 4195.9, + "valid_targets_min": 361 + }, + { + "epoch": 1.5873015873015874, + "grad_norm": 0.38334955845745405, + "learning_rate": 3.808073756731615e-05, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1762232482433319, + "step": 1000, + "valid_targets_mean": 5545.6, + "valid_targets_min": 949 + }, + { + "epoch": 1.5952380952380953, + "grad_norm": 0.5439994931547266, + "learning_rate": 3.8046761643543734e-05, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1913871467113495, + "step": 1005, + "valid_targets_mean": 4491.5, + "valid_targets_min": 369 + }, + { + "epoch": 1.6031746031746033, + "grad_norm": 0.40075365186274436, + "learning_rate": 3.8012503052014996e-05, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1946803331375122, + "step": 1010, + "valid_targets_mean": 5425.6, + "valid_targets_min": 764 + }, + { + "epoch": 1.6111111111111112, + "grad_norm": 0.3877028365107908, + "learning_rate": 3.797796232932476e-05, + "loss": 0.203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21146497130393982, + "step": 1015, + "valid_targets_mean": 6453.6, + "valid_targets_min": 2839 + }, + { + "epoch": 1.619047619047619, + "grad_norm": 0.3668527782681026, + "learning_rate": 3.794314001648692e-05, + "loss": 0.2022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18072551488876343, + "step": 1020, + "valid_targets_mean": 6010.3, + "valid_targets_min": 2661 + }, + { + "epoch": 1.626984126984127, + "grad_norm": 0.44781816078627307, + "learning_rate": 3.7908036658925926e-05, + "loss": 0.1919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2001311182975769, + "step": 1025, + "valid_targets_mean": 5515.9, + "valid_targets_min": 693 + }, + { + "epoch": 1.6349206349206349, + "grad_norm": 0.40397196550171016, + "learning_rate": 3.787265280646825e-05, + "loss": 0.1921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19972187280654907, + "step": 1030, + "valid_targets_mean": 5461.7, + "valid_targets_min": 289 + }, + { + "epoch": 1.6428571428571428, + "grad_norm": 0.4356586536795959, + "learning_rate": 3.7836989013333776e-05, + "loss": 0.2041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1890064775943756, + "step": 1035, + "valid_targets_mean": 5751.4, + "valid_targets_min": 211 + }, + { + "epoch": 1.6507936507936507, + "grad_norm": 0.44136645672562447, + "learning_rate": 3.780104583812712e-05, + "loss": 0.1952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1881527304649353, + "step": 1040, + "valid_targets_mean": 5342.1, + "valid_targets_min": 827 + }, + { + "epoch": 1.6587301587301586, + "grad_norm": 0.4490662637016875, + "learning_rate": 3.7764823843828883e-05, + "loss": 0.1933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20181840658187866, + "step": 1045, + "valid_targets_mean": 4998.9, + "valid_targets_min": 1099 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.414814689156328, + "learning_rate": 3.7728323597786834e-05, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2113458514213562, + "step": 1050, + "valid_targets_mean": 5317.9, + "valid_targets_min": 912 + }, + { + "epoch": 1.6746031746031746, + "grad_norm": 0.4002735439554692, + "learning_rate": 3.7691545671707007e-05, + "loss": 0.2019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19339720904827118, + "step": 1055, + "valid_targets_mean": 5467.6, + "valid_targets_min": 304 + }, + { + "epoch": 1.6825396825396826, + "grad_norm": 0.34935950618209505, + "learning_rate": 3.765449064164477e-05, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16387102007865906, + "step": 1060, + "valid_targets_mean": 5961.0, + "valid_targets_min": 1397 + }, + { + "epoch": 1.6904761904761905, + "grad_norm": 0.4617875758265747, + "learning_rate": 3.7617159087995784e-05, + "loss": 0.1952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20451588928699493, + "step": 1065, + "valid_targets_mean": 5329.4, + "valid_targets_min": 1754 + }, + { + "epoch": 1.6984126984126984, + "grad_norm": 0.43000065083291655, + "learning_rate": 3.757955159548693e-05, + "loss": 0.2123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19835901260375977, + "step": 1070, + "valid_targets_mean": 5593.4, + "valid_targets_min": 1554 + }, + { + "epoch": 1.7063492063492065, + "grad_norm": 0.3606371311330236, + "learning_rate": 3.754166875316713e-05, + "loss": 0.1868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18690404295921326, + "step": 1075, + "valid_targets_mean": 6369.9, + "valid_targets_min": 2694 + }, + { + "epoch": 1.7142857142857144, + "grad_norm": 0.39285827605092777, + "learning_rate": 3.750351115439812e-05, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20067650079727173, + "step": 1080, + "valid_targets_mean": 5648.6, + "valid_targets_min": 650 + }, + { + "epoch": 1.7222222222222223, + "grad_norm": 0.38570908603943815, + "learning_rate": 3.746507939684519e-05, + "loss": 0.1901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19070516526699066, + "step": 1085, + "valid_targets_mean": 6492.1, + "valid_targets_min": 3263 + }, + { + "epoch": 1.7301587301587302, + "grad_norm": 0.39910170101731635, + "learning_rate": 3.742637408246779e-05, + "loss": 0.2, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21158373355865479, + "step": 1090, + "valid_targets_mean": 6281.1, + "valid_targets_min": 1277 + }, + { + "epoch": 1.7380952380952381, + "grad_norm": 0.49874739628178993, + "learning_rate": 3.73873958175101e-05, + "loss": 0.2025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20216044783592224, + "step": 1095, + "valid_targets_mean": 4570.4, + "valid_targets_min": 797 + }, + { + "epoch": 1.746031746031746, + "grad_norm": 0.47674802821687573, + "learning_rate": 3.734814521249156e-05, + "loss": 0.2124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21278969943523407, + "step": 1100, + "valid_targets_mean": 6688.0, + "valid_targets_min": 4913 + }, + { + "epoch": 1.753968253968254, + "grad_norm": 0.4208345610037382, + "learning_rate": 3.7308622882197294e-05, + "loss": 0.2021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20342180132865906, + "step": 1105, + "valid_targets_mean": 4974.2, + "valid_targets_min": 1373 + }, + { + "epoch": 1.7619047619047619, + "grad_norm": 0.5036978972360446, + "learning_rate": 3.7268829445668456e-05, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19987335801124573, + "step": 1110, + "valid_targets_mean": 4656.1, + "valid_targets_min": 692 + }, + { + "epoch": 1.7698412698412698, + "grad_norm": 0.4475822251477478, + "learning_rate": 3.722876552619257e-05, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1833523064851761, + "step": 1115, + "valid_targets_mean": 5454.4, + "valid_targets_min": 1320 + }, + { + "epoch": 1.7777777777777777, + "grad_norm": 0.42301407783978096, + "learning_rate": 3.718843175129378e-05, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1914646029472351, + "step": 1120, + "valid_targets_mean": 6252.5, + "valid_targets_min": 899 + }, + { + "epoch": 1.7857142857142856, + "grad_norm": 0.3667105929189479, + "learning_rate": 3.7147828752722944e-05, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18096204102039337, + "step": 1125, + "valid_targets_mean": 5728.2, + "valid_targets_min": 1408 + }, + { + "epoch": 1.7936507936507935, + "grad_norm": 0.3988857634933404, + "learning_rate": 3.7106957166447834e-05, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20091086626052856, + "step": 1130, + "valid_targets_mean": 6323.2, + "valid_targets_min": 2797 + }, + { + "epoch": 1.8015873015873016, + "grad_norm": 0.4353965199064458, + "learning_rate": 3.7065817632643115e-05, + "loss": 0.1983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2031416893005371, + "step": 1135, + "valid_targets_mean": 5034.0, + "valid_targets_min": 1545 + }, + { + "epoch": 1.8095238095238095, + "grad_norm": 0.3835363845414605, + "learning_rate": 3.7024410795680326e-05, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20650315284729004, + "step": 1140, + "valid_targets_mean": 5709.4, + "valid_targets_min": 2868 + }, + { + "epoch": 1.8174603174603174, + "grad_norm": 0.6017933902148771, + "learning_rate": 3.698273730411782e-05, + "loss": 0.1895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21874158084392548, + "step": 1145, + "valid_targets_mean": 4664.8, + "valid_targets_min": 1051 + }, + { + "epoch": 1.8253968253968254, + "grad_norm": 0.5010674614611315, + "learning_rate": 3.694079781069053e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22206932306289673, + "step": 1150, + "valid_targets_mean": 5182.3, + "valid_targets_min": 1754 + }, + { + "epoch": 1.8333333333333335, + "grad_norm": 0.3923636627441356, + "learning_rate": 3.6898592972299875e-05, + "loss": 0.2019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17895236611366272, + "step": 1155, + "valid_targets_mean": 5307.1, + "valid_targets_min": 299 + }, + { + "epoch": 1.8412698412698414, + "grad_norm": 0.46097320064253466, + "learning_rate": 3.6856123450003306e-05, + "loss": 0.1934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17629393935203552, + "step": 1160, + "valid_targets_mean": 5396.4, + "valid_targets_min": 1717 + }, + { + "epoch": 1.8492063492063493, + "grad_norm": 0.4118263198386797, + "learning_rate": 3.68133899090041e-05, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21603277325630188, + "step": 1165, + "valid_targets_mean": 5743.1, + "valid_targets_min": 1944 + }, + { + "epoch": 1.8571428571428572, + "grad_norm": 0.40887713694857647, + "learning_rate": 3.677039301864085e-05, + "loss": 0.1862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21704469621181488, + "step": 1170, + "valid_targets_mean": 5467.9, + "valid_targets_min": 1922 + }, + { + "epoch": 1.8650793650793651, + "grad_norm": 0.3934682878408651, + "learning_rate": 3.672713345237701e-05, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19257432222366333, + "step": 1175, + "valid_targets_mean": 5539.5, + "valid_targets_min": 574 + }, + { + "epoch": 1.873015873015873, + "grad_norm": 0.40297241825152724, + "learning_rate": 3.6683611887790356e-05, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22532138228416443, + "step": 1180, + "valid_targets_mean": 5603.6, + "valid_targets_min": 839 + }, + { + "epoch": 1.880952380952381, + "grad_norm": 0.4525568905524082, + "learning_rate": 3.663982900656236e-05, + "loss": 0.1941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21516132354736328, + "step": 1185, + "valid_targets_mean": 4871.1, + "valid_targets_min": 852 + }, + { + "epoch": 1.8888888888888888, + "grad_norm": 0.3826095420283405, + "learning_rate": 3.6595785494467516e-05, + "loss": 0.2033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2126571089029312, + "step": 1190, + "valid_targets_mean": 6149.8, + "valid_targets_min": 322 + }, + { + "epoch": 1.8968253968253967, + "grad_norm": 0.49491876878445984, + "learning_rate": 3.655148204136259e-05, + "loss": 0.2157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1980828046798706, + "step": 1195, + "valid_targets_mean": 4812.1, + "valid_targets_min": 355 + }, + { + "epoch": 1.9047619047619047, + "grad_norm": 0.3844425255192722, + "learning_rate": 3.650691934117584e-05, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17985425889492035, + "step": 1200, + "valid_targets_mean": 5473.3, + "valid_targets_min": 2775 + }, + { + "epoch": 1.9126984126984126, + "grad_norm": 0.4025013315745311, + "learning_rate": 3.646209809189611e-05, + "loss": 0.1911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2079595923423767, + "step": 1205, + "valid_targets_mean": 5829.4, + "valid_targets_min": 2090 + }, + { + "epoch": 1.9206349206349205, + "grad_norm": 0.3718395023938511, + "learning_rate": 3.641701899556192e-05, + "loss": 0.1909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20109693706035614, + "step": 1210, + "valid_targets_mean": 5616.2, + "valid_targets_min": 447 + }, + { + "epoch": 1.9285714285714286, + "grad_norm": 0.41192663980117594, + "learning_rate": 3.63716827582505e-05, + "loss": 0.2066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19556891918182373, + "step": 1215, + "valid_targets_mean": 5405.4, + "valid_targets_min": 260 + }, + { + "epoch": 1.9365079365079365, + "grad_norm": 0.4061502796105446, + "learning_rate": 3.632609009006665e-05, + "loss": 0.1876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1885574758052826, + "step": 1220, + "valid_targets_mean": 4830.4, + "valid_targets_min": 1806 + }, + { + "epoch": 1.9444444444444444, + "grad_norm": 0.394449808870156, + "learning_rate": 3.62802417051317e-05, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1800915151834488, + "step": 1225, + "valid_targets_mean": 5700.7, + "valid_targets_min": 403 + }, + { + "epoch": 1.9523809523809523, + "grad_norm": 0.4440079869221486, + "learning_rate": 3.6234138321572274e-05, + "loss": 0.1999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17726945877075195, + "step": 1230, + "valid_targets_mean": 5161.5, + "valid_targets_min": 1498 + }, + { + "epoch": 1.9603174603174605, + "grad_norm": 0.4620529228104198, + "learning_rate": 3.6187780661509074e-05, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22657737135887146, + "step": 1235, + "valid_targets_mean": 5007.8, + "valid_targets_min": 1843 + }, + { + "epoch": 1.9682539682539684, + "grad_norm": 0.4536224923690941, + "learning_rate": 3.6141169451045526e-05, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1943274736404419, + "step": 1240, + "valid_targets_mean": 4127.1, + "valid_targets_min": 391 + }, + { + "epoch": 1.9761904761904763, + "grad_norm": 0.46913089325037155, + "learning_rate": 3.609430542025646e-05, + "loss": 0.1917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17612981796264648, + "step": 1245, + "valid_targets_mean": 5872.5, + "valid_targets_min": 1386 + }, + { + "epoch": 1.9841269841269842, + "grad_norm": 0.40006665679686376, + "learning_rate": 3.604718930317664e-05, + "loss": 0.1935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21723848581314087, + "step": 1250, + "valid_targets_mean": 5627.9, + "valid_targets_min": 1877 + }, + { + "epoch": 1.992063492063492, + "grad_norm": 0.4171450102000576, + "learning_rate": 3.5999821837789275e-05, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17687958478927612, + "step": 1255, + "valid_targets_mean": 4820.7, + "valid_targets_min": 275 + }, + { + "epoch": 2.0, + "grad_norm": 0.39376114464436146, + "learning_rate": 3.595220376601447e-05, + "loss": 0.1858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16747349500656128, + "step": 1260, + "valid_targets_mean": 5956.8, + "valid_targets_min": 510 + }, + { + "epoch": 2.007936507936508, + "grad_norm": 0.42367770753786105, + "learning_rate": 3.590433583369758e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.164157897233963, + "step": 1265, + "valid_targets_mean": 6732.4, + "valid_targets_min": 3077 + }, + { + "epoch": 2.015873015873016, + "grad_norm": 0.43491398653549607, + "learning_rate": 3.5856218790597554e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16858386993408203, + "step": 1270, + "valid_targets_mean": 4635.7, + "valid_targets_min": 267 + }, + { + "epoch": 2.0238095238095237, + "grad_norm": 0.4336346848049278, + "learning_rate": 3.580785339037519e-05, + "loss": 0.1762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1782153844833374, + "step": 1275, + "valid_targets_mean": 5509.9, + "valid_targets_min": 2820 + }, + { + "epoch": 2.0317460317460316, + "grad_norm": 0.4219922314179232, + "learning_rate": 3.57592403905813e-05, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17715319991111755, + "step": 1280, + "valid_targets_mean": 5722.5, + "valid_targets_min": 269 + }, + { + "epoch": 2.0396825396825395, + "grad_norm": 0.3940209379170676, + "learning_rate": 3.571038055264489e-05, + "loss": 0.188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16919057071208954, + "step": 1285, + "valid_targets_mean": 5806.7, + "valid_targets_min": 2137 + }, + { + "epoch": 2.0476190476190474, + "grad_norm": 0.9513050625567275, + "learning_rate": 3.566127464186119e-05, + "loss": 0.1672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17070958018302917, + "step": 1290, + "valid_targets_mean": 6109.1, + "valid_targets_min": 2815 + }, + { + "epoch": 2.0555555555555554, + "grad_norm": 0.39178871800874376, + "learning_rate": 3.56119234273797e-05, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15980932116508484, + "step": 1295, + "valid_targets_mean": 6312.2, + "valid_targets_min": 2200 + }, + { + "epoch": 2.0634920634920633, + "grad_norm": 0.4545939454742837, + "learning_rate": 3.5562327682192134e-05, + "loss": 0.184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17266622185707092, + "step": 1300, + "valid_targets_mean": 5758.9, + "valid_targets_min": 322 + }, + { + "epoch": 2.0714285714285716, + "grad_norm": 0.40752366743563423, + "learning_rate": 3.5512488183120286e-05, + "loss": 0.1772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18903039395809174, + "step": 1305, + "valid_targets_mean": 6031.5, + "valid_targets_min": 552 + }, + { + "epoch": 2.0793650793650795, + "grad_norm": 0.43154212786267104, + "learning_rate": 3.54624057108039e-05, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1685219407081604, + "step": 1310, + "valid_targets_mean": 4359.1, + "valid_targets_min": 492 + }, + { + "epoch": 2.0873015873015874, + "grad_norm": 0.41368733303715804, + "learning_rate": 3.5412081049688444e-05, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1651342809200287, + "step": 1315, + "valid_targets_mean": 5127.6, + "valid_targets_min": 1605 + }, + { + "epoch": 2.0952380952380953, + "grad_norm": 0.38589454302426857, + "learning_rate": 3.5361514988012774e-05, + "loss": 0.1771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1480841338634491, + "step": 1320, + "valid_targets_mean": 5874.6, + "valid_targets_min": 1503 + }, + { + "epoch": 2.1031746031746033, + "grad_norm": 0.43713259076728145, + "learning_rate": 3.5310708317796844e-05, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16304585337638855, + "step": 1325, + "valid_targets_mean": 5606.6, + "valid_targets_min": 2620 + }, + { + "epoch": 2.111111111111111, + "grad_norm": 0.5149351729670725, + "learning_rate": 3.5259661834829266e-05, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17917974293231964, + "step": 1330, + "valid_targets_mean": 6228.2, + "valid_targets_min": 1503 + }, + { + "epoch": 2.119047619047619, + "grad_norm": 0.3861389835108437, + "learning_rate": 3.5208376338654866e-05, + "loss": 0.1766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16448023915290833, + "step": 1335, + "valid_targets_mean": 6666.1, + "valid_targets_min": 708 + }, + { + "epoch": 2.126984126984127, + "grad_norm": 0.38207111066871835, + "learning_rate": 3.515685263256214e-05, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17809657752513885, + "step": 1340, + "valid_targets_mean": 6127.4, + "valid_targets_min": 1544 + }, + { + "epoch": 2.134920634920635, + "grad_norm": 0.4127592358303544, + "learning_rate": 3.51050915235707e-05, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18220895528793335, + "step": 1345, + "valid_targets_mean": 5229.9, + "valid_targets_min": 864 + }, + { + "epoch": 2.142857142857143, + "grad_norm": 0.42540313592591483, + "learning_rate": 3.5053093822418596e-05, + "loss": 0.1862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1991414576768875, + "step": 1350, + "valid_targets_mean": 5462.2, + "valid_targets_min": 2257 + }, + { + "epoch": 2.1507936507936507, + "grad_norm": 0.39687138441854536, + "learning_rate": 3.500086034354966e-05, + "loss": 0.1874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20256337523460388, + "step": 1355, + "valid_targets_mean": 5684.8, + "valid_targets_min": 2415 + }, + { + "epoch": 2.1587301587301586, + "grad_norm": 0.44932267888477845, + "learning_rate": 3.494839190510071e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17283421754837036, + "step": 1360, + "valid_targets_mean": 4960.4, + "valid_targets_min": 2345 + }, + { + "epoch": 2.1666666666666665, + "grad_norm": 0.49624084160009224, + "learning_rate": 3.489568932888877e-05, + "loss": 0.1798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1903117299079895, + "step": 1365, + "valid_targets_mean": 5084.8, + "valid_targets_min": 301 + }, + { + "epoch": 2.1746031746031744, + "grad_norm": 0.42424536754472875, + "learning_rate": 3.484275344039815e-05, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1761750876903534, + "step": 1370, + "valid_targets_mean": 4764.8, + "valid_targets_min": 310 + }, + { + "epoch": 2.1825396825396823, + "grad_norm": 0.3930655272562859, + "learning_rate": 3.478958506876759e-05, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16021452844142914, + "step": 1375, + "valid_targets_mean": 5342.4, + "valid_targets_min": 2886 + }, + { + "epoch": 2.1904761904761907, + "grad_norm": 0.5253004636054942, + "learning_rate": 3.47361850467772e-05, + "loss": 0.1835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1926204413175583, + "step": 1380, + "valid_targets_mean": 5126.9, + "valid_targets_min": 2220 + }, + { + "epoch": 2.1984126984126986, + "grad_norm": 0.3913488618062026, + "learning_rate": 3.468255421083546e-05, + "loss": 0.1869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2079114466905594, + "step": 1385, + "valid_targets_mean": 6658.1, + "valid_targets_min": 906 + }, + { + "epoch": 2.2063492063492065, + "grad_norm": 0.4343501732005746, + "learning_rate": 3.46286934009661e-05, + "loss": 0.1826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1819835603237152, + "step": 1390, + "valid_targets_mean": 4903.1, + "valid_targets_min": 1946 + }, + { + "epoch": 2.2142857142857144, + "grad_norm": 0.4642383235525826, + "learning_rate": 3.457460346079495e-05, + "loss": 0.174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1960304081439972, + "step": 1395, + "valid_targets_mean": 4859.3, + "valid_targets_min": 1756 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.39883822754099246, + "learning_rate": 3.452028523753673e-05, + "loss": 0.1821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1751224845647812, + "step": 1400, + "valid_targets_mean": 5698.9, + "valid_targets_min": 1531 + }, + { + "epoch": 2.2301587301587302, + "grad_norm": 0.37010389401431576, + "learning_rate": 3.446573958198176e-05, + "loss": 0.1812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18461816012859344, + "step": 1405, + "valid_targets_mean": 6230.6, + "valid_targets_min": 1700 + }, + { + "epoch": 2.238095238095238, + "grad_norm": 0.6181478614629903, + "learning_rate": 3.4410967348482666e-05, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14911070466041565, + "step": 1410, + "valid_targets_mean": 4602.1, + "valid_targets_min": 1798 + }, + { + "epoch": 2.246031746031746, + "grad_norm": 0.43108429750485655, + "learning_rate": 3.435596939494098e-05, + "loss": 0.1786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1552506387233734, + "step": 1415, + "valid_targets_mean": 5343.7, + "valid_targets_min": 860 + }, + { + "epoch": 2.253968253968254, + "grad_norm": 0.3655618940959395, + "learning_rate": 3.430074658279369e-05, + "loss": 0.1686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16736620664596558, + "step": 1420, + "valid_targets_mean": 6760.8, + "valid_targets_min": 4142 + }, + { + "epoch": 2.261904761904762, + "grad_norm": 0.43470898698901705, + "learning_rate": 3.424529977699977e-05, + "loss": 0.1747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18593010306358337, + "step": 1425, + "valid_targets_mean": 5456.4, + "valid_targets_min": 938 + }, + { + "epoch": 2.2698412698412698, + "grad_norm": 0.4012757385555316, + "learning_rate": 3.418962984602661e-05, + "loss": 0.1893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2028789222240448, + "step": 1430, + "valid_targets_mean": 6217.8, + "valid_targets_min": 2538 + }, + { + "epoch": 2.2777777777777777, + "grad_norm": 0.422277687599635, + "learning_rate": 3.413373766183646e-05, + "loss": 0.1766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17681989073753357, + "step": 1435, + "valid_targets_mean": 5242.7, + "valid_targets_min": 1133 + }, + { + "epoch": 2.2857142857142856, + "grad_norm": 0.4610732403739008, + "learning_rate": 3.40776240998727e-05, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19924741983413696, + "step": 1440, + "valid_targets_mean": 5244.7, + "valid_targets_min": 878 + }, + { + "epoch": 2.2936507936507935, + "grad_norm": 0.4230850293753159, + "learning_rate": 3.4021290039046184e-05, + "loss": 0.1939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18189997971057892, + "step": 1445, + "valid_targets_mean": 5056.2, + "valid_targets_min": 287 + }, + { + "epoch": 2.3015873015873014, + "grad_norm": 0.4282446015574262, + "learning_rate": 3.396473636172146e-05, + "loss": 0.1823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1614503115415573, + "step": 1450, + "valid_targets_mean": 5646.3, + "valid_targets_min": 1748 + }, + { + "epoch": 2.3095238095238093, + "grad_norm": 0.35393446023512337, + "learning_rate": 3.390796395370294e-05, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1533820927143097, + "step": 1455, + "valid_targets_mean": 6412.6, + "valid_targets_min": 930 + }, + { + "epoch": 2.317460317460317, + "grad_norm": 0.4006705141341369, + "learning_rate": 3.385097370422102e-05, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17715230584144592, + "step": 1460, + "valid_targets_mean": 5679.0, + "valid_targets_min": 1403 + }, + { + "epoch": 2.3253968253968256, + "grad_norm": 0.41667317090980344, + "learning_rate": 3.3793766505918185e-05, + "loss": 0.1827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18403995037078857, + "step": 1465, + "valid_targets_mean": 5691.4, + "valid_targets_min": 1804 + }, + { + "epoch": 2.3333333333333335, + "grad_norm": 0.4438049911035154, + "learning_rate": 3.3736343254834994e-05, + "loss": 0.1741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16705909371376038, + "step": 1470, + "valid_targets_mean": 5163.1, + "valid_targets_min": 514 + }, + { + "epoch": 2.3412698412698414, + "grad_norm": 0.41724254225439533, + "learning_rate": 3.3678704850396045e-05, + "loss": 0.1803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20384517312049866, + "step": 1475, + "valid_targets_mean": 5862.2, + "valid_targets_min": 328 + }, + { + "epoch": 2.3492063492063493, + "grad_norm": 0.4965895506653012, + "learning_rate": 3.362085219539592e-05, + "loss": 0.1903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19839468598365784, + "step": 1480, + "valid_targets_mean": 5439.9, + "valid_targets_min": 1170 + }, + { + "epoch": 2.357142857142857, + "grad_norm": 0.38336740154664095, + "learning_rate": 3.3562786195985025e-05, + "loss": 0.1821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.190034419298172, + "step": 1485, + "valid_targets_mean": 6053.7, + "valid_targets_min": 3073 + }, + { + "epoch": 2.365079365079365, + "grad_norm": 0.42799053268182136, + "learning_rate": 3.350450776165535e-05, + "loss": 0.1843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17199914157390594, + "step": 1490, + "valid_targets_mean": 5109.0, + "valid_targets_min": 638 + }, + { + "epoch": 2.373015873015873, + "grad_norm": 0.49042106827448145, + "learning_rate": 3.344601780522634e-05, + "loss": 0.1783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18558023869991302, + "step": 1495, + "valid_targets_mean": 4502.5, + "valid_targets_min": 1358 + }, + { + "epoch": 2.380952380952381, + "grad_norm": 0.452178036330889, + "learning_rate": 3.3387317242830466e-05, + "loss": 0.1873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17586269974708557, + "step": 1500, + "valid_targets_mean": 5095.8, + "valid_targets_min": 271 + }, + { + "epoch": 2.388888888888889, + "grad_norm": 0.4167028985093456, + "learning_rate": 3.332840699389897e-05, + "loss": 0.1721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16293631494045258, + "step": 1505, + "valid_targets_mean": 5093.2, + "valid_targets_min": 1199 + }, + { + "epoch": 2.3968253968253967, + "grad_norm": 0.4134999974854722, + "learning_rate": 3.32692879811474e-05, + "loss": 0.178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16504386067390442, + "step": 1510, + "valid_targets_mean": 4788.2, + "valid_targets_min": 281 + }, + { + "epoch": 2.4047619047619047, + "grad_norm": 0.38629332618116435, + "learning_rate": 3.320996113056123e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16470927000045776, + "step": 1515, + "valid_targets_mean": 5480.1, + "valid_targets_min": 1382 + }, + { + "epoch": 2.4126984126984126, + "grad_norm": 0.4517489878934338, + "learning_rate": 3.315042737138128e-05, + "loss": 0.1833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19131216406822205, + "step": 1520, + "valid_targets_mean": 5177.0, + "valid_targets_min": 1427 + }, + { + "epoch": 2.4206349206349205, + "grad_norm": 0.3912437196261749, + "learning_rate": 3.309068763608919e-05, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15866073966026306, + "step": 1525, + "valid_targets_mean": 4809.5, + "valid_targets_min": 304 + }, + { + "epoch": 2.4285714285714284, + "grad_norm": 0.4287081470079961, + "learning_rate": 3.303074286039285e-05, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18889503180980682, + "step": 1530, + "valid_targets_mean": 5050.8, + "valid_targets_min": 1050 + }, + { + "epoch": 2.4365079365079367, + "grad_norm": 0.5226823839069293, + "learning_rate": 3.2970593983211694e-05, + "loss": 0.1766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17266133427619934, + "step": 1535, + "valid_targets_mean": 5716.3, + "valid_targets_min": 2116 + }, + { + "epoch": 2.4444444444444446, + "grad_norm": 0.4142977777917709, + "learning_rate": 3.2910241946661993e-05, + "loss": 0.178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17832788825035095, + "step": 1540, + "valid_targets_mean": 5296.9, + "valid_targets_min": 600 + }, + { + "epoch": 2.4523809523809526, + "grad_norm": 0.4094380830442922, + "learning_rate": 3.2849687696042165e-05, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16501125693321228, + "step": 1545, + "valid_targets_mean": 5243.6, + "valid_targets_min": 337 + }, + { + "epoch": 2.4603174603174605, + "grad_norm": 0.5264844857969575, + "learning_rate": 3.2788932179817886e-05, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19089126586914062, + "step": 1550, + "valid_targets_mean": 4228.1, + "valid_targets_min": 574 + }, + { + "epoch": 2.4682539682539684, + "grad_norm": 0.3870078917146876, + "learning_rate": 3.2727976349607276e-05, + "loss": 0.1742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16617675125598907, + "step": 1555, + "valid_targets_mean": 5233.7, + "valid_targets_min": 283 + }, + { + "epoch": 2.4761904761904763, + "grad_norm": 0.37140129207143047, + "learning_rate": 3.266682116016599e-05, + "loss": 0.1785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15645205974578857, + "step": 1560, + "valid_targets_mean": 6038.5, + "valid_targets_min": 858 + }, + { + "epoch": 2.484126984126984, + "grad_norm": 0.3972434967156975, + "learning_rate": 3.260546756937227e-05, + "loss": 0.1742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1872669756412506, + "step": 1565, + "valid_targets_mean": 5819.3, + "valid_targets_min": 343 + }, + { + "epoch": 2.492063492063492, + "grad_norm": 0.41540248633079907, + "learning_rate": 3.254391653821192e-05, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1847609579563141, + "step": 1570, + "valid_targets_mean": 5872.5, + "valid_targets_min": 366 + }, + { + "epoch": 2.5, + "grad_norm": 0.4326289822516213, + "learning_rate": 3.248216903076328e-05, + "loss": 0.1833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20463158190250397, + "step": 1575, + "valid_targets_mean": 5289.8, + "valid_targets_min": 279 + }, + { + "epoch": 2.507936507936508, + "grad_norm": 0.5002103630542671, + "learning_rate": 3.24202260141821e-05, + "loss": 0.1849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19607800245285034, + "step": 1580, + "valid_targets_mean": 4856.6, + "valid_targets_min": 377 + }, + { + "epoch": 2.515873015873016, + "grad_norm": 0.44088426161151917, + "learning_rate": 3.235808845868641e-05, + "loss": 0.1911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17973065376281738, + "step": 1585, + "valid_targets_mean": 5105.2, + "valid_targets_min": 307 + }, + { + "epoch": 2.5238095238095237, + "grad_norm": 0.41586874944924196, + "learning_rate": 3.229575733754132e-05, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18156777322292328, + "step": 1590, + "valid_targets_mean": 5101.9, + "valid_targets_min": 594 + }, + { + "epoch": 2.5317460317460316, + "grad_norm": 0.3977154984604151, + "learning_rate": 3.2233233627043765e-05, + "loss": 0.1802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19697993993759155, + "step": 1595, + "valid_targets_mean": 5949.2, + "valid_targets_min": 838 + }, + { + "epoch": 2.5396825396825395, + "grad_norm": 0.3893767013034315, + "learning_rate": 3.217051830650722e-05, + "loss": 0.1788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1648177206516266, + "step": 1600, + "valid_targets_mean": 5405.4, + "valid_targets_min": 633 + }, + { + "epoch": 2.5476190476190474, + "grad_norm": 0.4012855625129347, + "learning_rate": 3.210761235824639e-05, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1662881076335907, + "step": 1605, + "valid_targets_mean": 5683.6, + "valid_targets_min": 775 + }, + { + "epoch": 2.5555555555555554, + "grad_norm": 0.41653714911973017, + "learning_rate": 3.204451676756175e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1690150797367096, + "step": 1610, + "valid_targets_mean": 5188.4, + "valid_targets_min": 2353 + }, + { + "epoch": 2.5634920634920633, + "grad_norm": 0.43105631877085077, + "learning_rate": 3.198123252272419e-05, + "loss": 0.1789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1893271803855896, + "step": 1615, + "valid_targets_mean": 5241.9, + "valid_targets_min": 2285 + }, + { + "epoch": 2.571428571428571, + "grad_norm": 0.45967281455616704, + "learning_rate": 3.1917760614959505e-05, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19677485525608063, + "step": 1620, + "valid_targets_mean": 5801.2, + "valid_targets_min": 1301 + }, + { + "epoch": 2.5793650793650795, + "grad_norm": 0.40150052297597855, + "learning_rate": 3.1854102038432856e-05, + "loss": 0.1803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1876111626625061, + "step": 1625, + "valid_targets_mean": 5630.9, + "valid_targets_min": 404 + }, + { + "epoch": 2.5873015873015874, + "grad_norm": 0.352002911984159, + "learning_rate": 3.17902577902332e-05, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1494748890399933, + "step": 1630, + "valid_targets_mean": 5767.0, + "valid_targets_min": 330 + }, + { + "epoch": 2.5952380952380953, + "grad_norm": 0.38659122601598817, + "learning_rate": 3.172622887035771e-05, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17912599444389343, + "step": 1635, + "valid_targets_mean": 5444.4, + "valid_targets_min": 260 + }, + { + "epoch": 2.6031746031746033, + "grad_norm": 0.566374914273737, + "learning_rate": 3.1662016281696073e-05, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1878930926322937, + "step": 1640, + "valid_targets_mean": 4545.0, + "valid_targets_min": 502 + }, + { + "epoch": 2.611111111111111, + "grad_norm": 0.40019696946056404, + "learning_rate": 3.15976210300148e-05, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19527481496334076, + "step": 1645, + "valid_targets_mean": 5715.0, + "valid_targets_min": 366 + }, + { + "epoch": 2.619047619047619, + "grad_norm": 0.39423172570166204, + "learning_rate": 3.153304412394143e-05, + "loss": 0.1652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1640002578496933, + "step": 1650, + "valid_targets_mean": 5794.7, + "valid_targets_min": 680 + }, + { + "epoch": 2.626984126984127, + "grad_norm": 0.4024698863549588, + "learning_rate": 3.146828657494883e-05, + "loss": 0.1933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1920669972896576, + "step": 1655, + "valid_targets_mean": 5749.9, + "valid_targets_min": 3138 + }, + { + "epoch": 2.634920634920635, + "grad_norm": 0.3925326135892127, + "learning_rate": 3.140334939733924e-05, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18373993039131165, + "step": 1660, + "valid_targets_mean": 5227.9, + "valid_targets_min": 2482 + }, + { + "epoch": 2.642857142857143, + "grad_norm": 0.46866690749449025, + "learning_rate": 3.1338233608228455e-05, + "loss": 0.1701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17870697379112244, + "step": 1665, + "valid_targets_mean": 5330.0, + "valid_targets_min": 609 + }, + { + "epoch": 2.6507936507936507, + "grad_norm": 0.3948699322229872, + "learning_rate": 3.127294022752988e-05, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16245272755622864, + "step": 1670, + "valid_targets_mean": 5712.2, + "valid_targets_min": 768 + }, + { + "epoch": 2.6587301587301586, + "grad_norm": 0.33562761669358987, + "learning_rate": 3.120747027793854e-05, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1378447711467743, + "step": 1675, + "valid_targets_mean": 6322.2, + "valid_targets_min": 674 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 0.4080788254926686, + "learning_rate": 3.114182478491509e-05, + "loss": 0.1699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16858676075935364, + "step": 1680, + "valid_targets_mean": 5623.9, + "valid_targets_min": 325 + }, + { + "epoch": 2.674603174603175, + "grad_norm": 0.44788385062808955, + "learning_rate": 3.107600477666969e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17567336559295654, + "step": 1685, + "valid_targets_mean": 5283.2, + "valid_targets_min": 2475 + }, + { + "epoch": 2.682539682539683, + "grad_norm": 0.3698134704035969, + "learning_rate": 3.1010011284146004e-05, + "loss": 0.1785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1585252285003662, + "step": 1690, + "valid_targets_mean": 5612.2, + "valid_targets_min": 1509 + }, + { + "epoch": 2.6904761904761907, + "grad_norm": 0.4744606363702609, + "learning_rate": 3.0943845341004944e-05, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21260786056518555, + "step": 1695, + "valid_targets_mean": 4632.7, + "valid_targets_min": 268 + }, + { + "epoch": 2.6984126984126986, + "grad_norm": 0.3925483959574624, + "learning_rate": 3.087750798360856e-05, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1703377664089203, + "step": 1700, + "valid_targets_mean": 6203.7, + "valid_targets_min": 1733 + }, + { + "epoch": 2.7063492063492065, + "grad_norm": 0.423603961482913, + "learning_rate": 3.0811000251003774e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1864738166332245, + "step": 1705, + "valid_targets_mean": 4999.4, + "valid_targets_min": 633 + }, + { + "epoch": 2.7142857142857144, + "grad_norm": 0.42173447744798365, + "learning_rate": 3.074432318490608e-05, + "loss": 0.1819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17015457153320312, + "step": 1710, + "valid_targets_mean": 4929.1, + "valid_targets_min": 1230 + }, + { + "epoch": 2.7222222222222223, + "grad_norm": 0.3656999014872132, + "learning_rate": 3.067747782968328e-05, + "loss": 0.1735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1662687063217163, + "step": 1715, + "valid_targets_mean": 5857.1, + "valid_targets_min": 1370 + }, + { + "epoch": 2.7301587301587302, + "grad_norm": 0.4250802423607952, + "learning_rate": 3.0610465232339096e-05, + "loss": 0.1802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17572614550590515, + "step": 1720, + "valid_targets_mean": 5605.6, + "valid_targets_min": 895 + }, + { + "epoch": 2.738095238095238, + "grad_norm": 0.39832850275301074, + "learning_rate": 3.054328644249677e-05, + "loss": 0.174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17158354818820953, + "step": 1725, + "valid_targets_mean": 5492.9, + "valid_targets_min": 1324 + }, + { + "epoch": 2.746031746031746, + "grad_norm": 0.40611280034429587, + "learning_rate": 3.047594251238265e-05, + "loss": 0.1741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17876523733139038, + "step": 1730, + "valid_targets_mean": 5122.9, + "valid_targets_min": 1754 + }, + { + "epoch": 2.753968253968254, + "grad_norm": 0.3850213501848369, + "learning_rate": 3.0408434496809643e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1557503491640091, + "step": 1735, + "valid_targets_mean": 4817.9, + "valid_targets_min": 346 + }, + { + "epoch": 2.761904761904762, + "grad_norm": 0.43072814030855905, + "learning_rate": 3.034076345316079e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1973961591720581, + "step": 1740, + "valid_targets_mean": 5802.5, + "valid_targets_min": 1788 + }, + { + "epoch": 2.7698412698412698, + "grad_norm": 0.4244003572959457, + "learning_rate": 3.0272930441372628e-05, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1777854859828949, + "step": 1745, + "valid_targets_mean": 5021.1, + "valid_targets_min": 1233 + }, + { + "epoch": 2.7777777777777777, + "grad_norm": 0.3968223319635917, + "learning_rate": 3.02049365239186e-05, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18718859553337097, + "step": 1750, + "valid_targets_mean": 5884.2, + "valid_targets_min": 3484 + }, + { + "epoch": 2.7857142857142856, + "grad_norm": 0.38979762153827907, + "learning_rate": 3.0136782765792455e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.184782013297081, + "step": 1755, + "valid_targets_mean": 5326.1, + "valid_targets_min": 3768 + }, + { + "epoch": 2.7936507936507935, + "grad_norm": 0.401430603353089, + "learning_rate": 3.0068470234491517e-05, + "loss": 0.1816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18122202157974243, + "step": 1760, + "valid_targets_mean": 5606.8, + "valid_targets_min": 2889 + }, + { + "epoch": 2.8015873015873014, + "grad_norm": 0.46774432941464267, + "learning_rate": 3.0000000000000004e-05, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19038715958595276, + "step": 1765, + "valid_targets_mean": 5706.6, + "valid_targets_min": 953 + }, + { + "epoch": 2.8095238095238093, + "grad_norm": 0.3900657231476483, + "learning_rate": 2.993137313477223e-05, + "loss": 0.1849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16720932722091675, + "step": 1770, + "valid_targets_mean": 5955.4, + "valid_targets_min": 229 + }, + { + "epoch": 2.817460317460317, + "grad_norm": 0.4671446489123995, + "learning_rate": 2.9862590713715837e-05, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1919449269771576, + "step": 1775, + "valid_targets_mean": 4992.8, + "valid_targets_min": 1710 + }, + { + "epoch": 2.825396825396825, + "grad_norm": 0.4108207960556999, + "learning_rate": 2.9793653814174957e-05, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16805459558963776, + "step": 1780, + "valid_targets_mean": 5892.2, + "valid_targets_min": 1220 + }, + { + "epoch": 2.8333333333333335, + "grad_norm": 0.3878996578053704, + "learning_rate": 2.9724563515913317e-05, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16483880579471588, + "step": 1785, + "valid_targets_mean": 5715.8, + "valid_targets_min": 816 + }, + { + "epoch": 2.8412698412698414, + "grad_norm": 0.3924118467590962, + "learning_rate": 2.9655320901097348e-05, + "loss": 0.1827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16196705400943756, + "step": 1790, + "valid_targets_mean": 5235.0, + "valid_targets_min": 2689 + }, + { + "epoch": 2.8492063492063493, + "grad_norm": 0.38981761916486113, + "learning_rate": 2.9585927054279224e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1542998105287552, + "step": 1795, + "valid_targets_mean": 5381.4, + "valid_targets_min": 2263 + }, + { + "epoch": 2.857142857142857, + "grad_norm": 0.3581946508380772, + "learning_rate": 2.951638306237988e-05, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17149242758750916, + "step": 1800, + "valid_targets_mean": 6313.2, + "valid_targets_min": 1580 + }, + { + "epoch": 2.865079365079365, + "grad_norm": 0.4494970197989124, + "learning_rate": 2.9446690014671976e-05, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1602257341146469, + "step": 1805, + "valid_targets_mean": 5223.4, + "valid_targets_min": 935 + }, + { + "epoch": 2.873015873015873, + "grad_norm": 0.37530142354886875, + "learning_rate": 2.937684900276285e-05, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15792617201805115, + "step": 1810, + "valid_targets_mean": 5738.4, + "valid_targets_min": 2336 + }, + { + "epoch": 2.880952380952381, + "grad_norm": 0.39396887510337375, + "learning_rate": 2.9306861120577416e-05, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15244446694850922, + "step": 1815, + "valid_targets_mean": 5644.9, + "valid_targets_min": 1221 + }, + { + "epoch": 2.888888888888889, + "grad_norm": 0.3706798220884454, + "learning_rate": 2.923672746434103e-05, + "loss": 0.1754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1762312352657318, + "step": 1820, + "valid_targets_mean": 6201.0, + "valid_targets_min": 552 + }, + { + "epoch": 2.8968253968253967, + "grad_norm": 0.4021984022426286, + "learning_rate": 2.9166449132562303e-05, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2141510546207428, + "step": 1825, + "valid_targets_mean": 5325.4, + "valid_targets_min": 1539 + }, + { + "epoch": 2.9047619047619047, + "grad_norm": 0.42224038990358104, + "learning_rate": 2.9096027226015927e-05, + "loss": 0.1742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18177683651447296, + "step": 1830, + "valid_targets_mean": 4708.6, + "valid_targets_min": 1117 + }, + { + "epoch": 2.9126984126984126, + "grad_norm": 0.3819006419400972, + "learning_rate": 2.9025462847725405e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16706177592277527, + "step": 1835, + "valid_targets_mean": 6122.9, + "valid_targets_min": 2503 + }, + { + "epoch": 2.9206349206349205, + "grad_norm": 0.4410064177919863, + "learning_rate": 2.8954757102945798e-05, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2258429080247879, + "step": 1840, + "valid_targets_mean": 4895.2, + "valid_targets_min": 833 + }, + { + "epoch": 2.928571428571429, + "grad_norm": 0.39487518735853117, + "learning_rate": 2.888391109914638e-05, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17937694489955902, + "step": 1845, + "valid_targets_mean": 5114.6, + "valid_targets_min": 643 + }, + { + "epoch": 2.9365079365079367, + "grad_norm": 0.38265019511969883, + "learning_rate": 2.8812925945993333e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17933601140975952, + "step": 1850, + "valid_targets_mean": 5663.7, + "valid_targets_min": 1513 + }, + { + "epoch": 2.9444444444444446, + "grad_norm": 0.402651671534175, + "learning_rate": 2.8741802755332332e-05, + "loss": 0.1891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17298966646194458, + "step": 1855, + "valid_targets_mean": 4725.4, + "valid_targets_min": 403 + }, + { + "epoch": 2.9523809523809526, + "grad_norm": 0.43929484251032125, + "learning_rate": 2.8670542641171155e-05, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19994154572486877, + "step": 1860, + "valid_targets_mean": 4797.8, + "valid_targets_min": 657 + }, + { + "epoch": 2.9603174603174605, + "grad_norm": 0.40410989413843573, + "learning_rate": 2.859914671966221e-05, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17386558651924133, + "step": 1865, + "valid_targets_mean": 4977.1, + "valid_targets_min": 670 + }, + { + "epoch": 2.9682539682539684, + "grad_norm": 0.346301301173511, + "learning_rate": 2.8527616109085082e-05, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15783601999282837, + "step": 1870, + "valid_targets_mean": 5833.5, + "valid_targets_min": 1896 + }, + { + "epoch": 2.9761904761904763, + "grad_norm": 0.38049759940528577, + "learning_rate": 2.8455951929828977e-05, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15591737627983093, + "step": 1875, + "valid_targets_mean": 5575.6, + "valid_targets_min": 295 + }, + { + "epoch": 2.984126984126984, + "grad_norm": 0.3831654484686578, + "learning_rate": 2.8384155304375223e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19842402637004852, + "step": 1880, + "valid_targets_mean": 6079.8, + "valid_targets_min": 1937 + }, + { + "epoch": 2.992063492063492, + "grad_norm": 0.487008799638132, + "learning_rate": 2.8312227357279646e-05, + "loss": 0.1851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.192101389169693, + "step": 1885, + "valid_targets_mean": 4321.4, + "valid_targets_min": 342 + }, + { + "epoch": 3.0, + "grad_norm": 0.5184260476406786, + "learning_rate": 2.8240169215154977e-05, + "loss": 0.1692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1595619022846222, + "step": 1890, + "valid_targets_mean": 5334.9, + "valid_targets_min": 325 + }, + { + "epoch": 3.007936507936508, + "grad_norm": 0.3405314330974361, + "learning_rate": 2.8167982006653196e-05, + "loss": 0.1623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15521922707557678, + "step": 1895, + "valid_targets_mean": 6951.9, + "valid_targets_min": 3834 + }, + { + "epoch": 3.015873015873016, + "grad_norm": 0.44531826407382213, + "learning_rate": 2.8095666862447876e-05, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18891948461532593, + "step": 1900, + "valid_targets_mean": 5601.6, + "valid_targets_min": 2266 + }, + { + "epoch": 3.0238095238095237, + "grad_norm": 0.3667355362457471, + "learning_rate": 2.8023224915216442e-05, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13565120100975037, + "step": 1905, + "valid_targets_mean": 6374.4, + "valid_targets_min": 1807 + }, + { + "epoch": 3.0317460317460316, + "grad_norm": 0.36488467668267915, + "learning_rate": 2.795065729962244e-05, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15088070929050446, + "step": 1910, + "valid_targets_mean": 6743.3, + "valid_targets_min": 3371 + }, + { + "epoch": 3.0396825396825395, + "grad_norm": 0.5240072304479968, + "learning_rate": 2.7877965152297785e-05, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16858817636966705, + "step": 1915, + "valid_targets_mean": 5218.3, + "valid_targets_min": 325 + }, + { + "epoch": 3.0476190476190474, + "grad_norm": 0.4345088557078499, + "learning_rate": 2.780514961182492e-05, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18977370858192444, + "step": 1920, + "valid_targets_mean": 6161.9, + "valid_targets_min": 978 + }, + { + "epoch": 3.0555555555555554, + "grad_norm": 0.43067510343421206, + "learning_rate": 2.773221181871903e-05, + "loss": 0.145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15231278538703918, + "step": 1925, + "valid_targets_mean": 5256.0, + "valid_targets_min": 1166 + }, + { + "epoch": 3.0634920634920633, + "grad_norm": 0.4329109482212343, + "learning_rate": 2.765915291541013e-05, + "loss": 0.1586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1648765504360199, + "step": 1930, + "valid_targets_mean": 5768.6, + "valid_targets_min": 2664 + }, + { + "epoch": 3.0714285714285716, + "grad_norm": 0.4360393225780438, + "learning_rate": 2.7585974046225206e-05, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16902469098567963, + "step": 1935, + "valid_targets_mean": 4864.1, + "valid_targets_min": 1763 + }, + { + "epoch": 3.0793650793650795, + "grad_norm": 0.42242563176181436, + "learning_rate": 2.751267635737027e-05, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15772101283073425, + "step": 1940, + "valid_targets_mean": 5512.9, + "valid_targets_min": 340 + }, + { + "epoch": 3.0873015873015874, + "grad_norm": 0.38359983981254425, + "learning_rate": 2.7439260996912423e-05, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.171489417552948, + "step": 1945, + "valid_targets_mean": 5938.1, + "valid_targets_min": 2752 + }, + { + "epoch": 3.0952380952380953, + "grad_norm": 0.41322753491472525, + "learning_rate": 2.7365729114761862e-05, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15758131444454193, + "step": 1950, + "valid_targets_mean": 5200.1, + "valid_targets_min": 370 + }, + { + "epoch": 3.1031746031746033, + "grad_norm": 0.4186192867390412, + "learning_rate": 2.729208186265386e-05, + "loss": 0.1546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14994092285633087, + "step": 1955, + "valid_targets_mean": 5166.9, + "valid_targets_min": 251 + }, + { + "epoch": 3.111111111111111, + "grad_norm": 0.3644255779428956, + "learning_rate": 2.721832039413077e-05, + "loss": 0.1513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14574278891086578, + "step": 1960, + "valid_targets_mean": 6365.3, + "valid_targets_min": 1939 + }, + { + "epoch": 3.119047619047619, + "grad_norm": 0.41549829471711147, + "learning_rate": 2.7144445864523887e-05, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15822090208530426, + "step": 1965, + "valid_targets_mean": 5193.6, + "valid_targets_min": 300 + }, + { + "epoch": 3.126984126984127, + "grad_norm": 0.4080754517778744, + "learning_rate": 2.7070459430935407e-05, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18438664078712463, + "step": 1970, + "valid_targets_mean": 6076.5, + "valid_targets_min": 366 + }, + { + "epoch": 3.134920634920635, + "grad_norm": 0.34845655756539395, + "learning_rate": 2.69963622522203e-05, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1505107879638672, + "step": 1975, + "valid_targets_mean": 7000.2, + "valid_targets_min": 867 + }, + { + "epoch": 3.142857142857143, + "grad_norm": 0.3957443526395654, + "learning_rate": 2.6922155488968117e-05, + "loss": 0.1558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15863436460494995, + "step": 1980, + "valid_targets_mean": 6114.2, + "valid_targets_min": 1754 + }, + { + "epoch": 3.1507936507936507, + "grad_norm": 0.3987157816987954, + "learning_rate": 2.684784030348486e-05, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1591426432132721, + "step": 1985, + "valid_targets_mean": 5729.1, + "valid_targets_min": 2795 + }, + { + "epoch": 3.1587301587301586, + "grad_norm": 0.3311471215567983, + "learning_rate": 2.6773417859774755e-05, + "loss": 0.1575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13412167131900787, + "step": 1990, + "valid_targets_mean": 7218.6, + "valid_targets_min": 5066 + }, + { + "epoch": 3.1666666666666665, + "grad_norm": 0.44452141015453234, + "learning_rate": 2.669888932352201e-05, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14640459418296814, + "step": 1995, + "valid_targets_mean": 4116.0, + "valid_targets_min": 582 + }, + { + "epoch": 3.1746031746031744, + "grad_norm": 0.4296245823041024, + "learning_rate": 2.662425586207259e-05, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18060961365699768, + "step": 2000, + "valid_targets_mean": 5717.8, + "valid_targets_min": 2684 + }, + { + "epoch": 3.1825396825396823, + "grad_norm": 0.44161933967338207, + "learning_rate": 2.6549518644415876e-05, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13894905149936676, + "step": 2005, + "valid_targets_mean": 5385.4, + "valid_targets_min": 1651 + }, + { + "epoch": 3.1904761904761907, + "grad_norm": 0.4402027862054312, + "learning_rate": 2.6474678841166426e-05, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15985198318958282, + "step": 2010, + "valid_targets_mean": 4908.7, + "valid_targets_min": 263 + }, + { + "epoch": 3.1984126984126986, + "grad_norm": 0.4521093840332604, + "learning_rate": 2.639973762454558e-05, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17308469116687775, + "step": 2015, + "valid_targets_mean": 5940.6, + "valid_targets_min": 2224 + }, + { + "epoch": 3.2063492063492065, + "grad_norm": 0.44949779778305143, + "learning_rate": 2.6324696168363134e-05, + "loss": 0.1588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16031065583229065, + "step": 2020, + "valid_targets_mean": 4588.2, + "valid_targets_min": 1570 + }, + { + "epoch": 3.2142857142857144, + "grad_norm": 0.44736879299802484, + "learning_rate": 2.624955564799894e-05, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18750852346420288, + "step": 2025, + "valid_targets_mean": 4769.4, + "valid_targets_min": 558 + }, + { + "epoch": 3.2222222222222223, + "grad_norm": 0.4063469374069734, + "learning_rate": 2.617431724038451e-05, + "loss": 0.1592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15944424271583557, + "step": 2030, + "valid_targets_mean": 5581.2, + "valid_targets_min": 1012 + }, + { + "epoch": 3.2301587301587302, + "grad_norm": 0.4237562908407849, + "learning_rate": 2.609898212398455e-05, + "loss": 0.1532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15621069073677063, + "step": 2035, + "valid_targets_mean": 5366.9, + "valid_targets_min": 1168 + }, + { + "epoch": 3.238095238095238, + "grad_norm": 0.39370444783531333, + "learning_rate": 2.6023551478778535e-05, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14452454447746277, + "step": 2040, + "valid_targets_mean": 5937.5, + "valid_targets_min": 225 + }, + { + "epoch": 3.246031746031746, + "grad_norm": 0.49270720514980915, + "learning_rate": 2.5948026486242225e-05, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1724134087562561, + "step": 2045, + "valid_targets_mean": 3770.9, + "valid_targets_min": 767 + }, + { + "epoch": 3.253968253968254, + "grad_norm": 0.4577588443456724, + "learning_rate": 2.5872408329329136e-05, + "loss": 0.1606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13135287165641785, + "step": 2050, + "valid_targets_mean": 5848.4, + "valid_targets_min": 2027 + }, + { + "epoch": 3.261904761904762, + "grad_norm": 0.44271121122885954, + "learning_rate": 2.5796698192452016e-05, + "loss": 0.16, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16199621558189392, + "step": 2055, + "valid_targets_mean": 4766.6, + "valid_targets_min": 404 + }, + { + "epoch": 3.2698412698412698, + "grad_norm": 0.39031087916457097, + "learning_rate": 2.572089726146432e-05, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16037863492965698, + "step": 2060, + "valid_targets_mean": 5805.1, + "valid_targets_min": 728 + }, + { + "epoch": 3.2777777777777777, + "grad_norm": 0.4646573659873822, + "learning_rate": 2.564500672364162e-05, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.162231907248497, + "step": 2065, + "valid_targets_mean": 4860.0, + "valid_targets_min": 260 + }, + { + "epoch": 3.2857142857142856, + "grad_norm": 0.3990327001371696, + "learning_rate": 2.556902776766298e-05, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15069621801376343, + "step": 2070, + "valid_targets_mean": 5909.9, + "valid_targets_min": 1698 + }, + { + "epoch": 3.2936507936507935, + "grad_norm": 0.43271648198961715, + "learning_rate": 2.5492961583592397e-05, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.170293927192688, + "step": 2075, + "valid_targets_mean": 5133.9, + "valid_targets_min": 1647 + }, + { + "epoch": 3.3015873015873014, + "grad_norm": 0.44977807483400484, + "learning_rate": 2.5416809362860107e-05, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18963780999183655, + "step": 2080, + "valid_targets_mean": 4750.8, + "valid_targets_min": 559 + }, + { + "epoch": 3.3095238095238093, + "grad_norm": 0.3681715462106892, + "learning_rate": 2.5340572298243946e-05, + "loss": 0.1484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1364738941192627, + "step": 2085, + "valid_targets_mean": 6450.1, + "valid_targets_min": 4105 + }, + { + "epoch": 3.317460317460317, + "grad_norm": 0.45380051213619826, + "learning_rate": 2.5264251583850677e-05, + "loss": 0.1728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1766563057899475, + "step": 2090, + "valid_targets_mean": 4932.4, + "valid_targets_min": 342 + }, + { + "epoch": 3.3253968253968256, + "grad_norm": 0.34929908788411107, + "learning_rate": 2.518784841509726e-05, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.142880380153656, + "step": 2095, + "valid_targets_mean": 7236.7, + "valid_targets_min": 3519 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.42494242702127294, + "learning_rate": 2.511136398869216e-05, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1457439363002777, + "step": 2100, + "valid_targets_mean": 5273.4, + "valid_targets_min": 2522 + }, + { + "epoch": 3.3412698412698414, + "grad_norm": 0.41871282992961945, + "learning_rate": 2.503479950261658e-05, + "loss": 0.1564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14400547742843628, + "step": 2105, + "valid_targets_mean": 5479.6, + "valid_targets_min": 1124 + }, + { + "epoch": 3.3492063492063493, + "grad_norm": 0.41199941257756795, + "learning_rate": 2.4958156156105693e-05, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16985690593719482, + "step": 2110, + "valid_targets_mean": 5431.3, + "valid_targets_min": 322 + }, + { + "epoch": 3.357142857142857, + "grad_norm": 0.41511677551741843, + "learning_rate": 2.4881435149629892e-05, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15738296508789062, + "step": 2115, + "valid_targets_mean": 5947.6, + "valid_targets_min": 1723 + }, + { + "epoch": 3.365079365079365, + "grad_norm": 0.3658736974288424, + "learning_rate": 2.4804637684875937e-05, + "loss": 0.1643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1301427185535431, + "step": 2120, + "valid_targets_mean": 7314.8, + "valid_targets_min": 534 + }, + { + "epoch": 3.373015873015873, + "grad_norm": 0.4048532254523095, + "learning_rate": 2.4727764964728177e-05, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15161439776420593, + "step": 2125, + "valid_targets_mean": 5149.5, + "valid_targets_min": 2014 + }, + { + "epoch": 3.380952380952381, + "grad_norm": 0.3829760082289399, + "learning_rate": 2.4650818193249693e-05, + "loss": 0.1596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14068162441253662, + "step": 2130, + "valid_targets_mean": 5836.1, + "valid_targets_min": 1685 + }, + { + "epoch": 3.388888888888889, + "grad_norm": 0.4507430940791641, + "learning_rate": 2.4573798575663425e-05, + "loss": 0.1671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16859221458435059, + "step": 2135, + "valid_targets_mean": 5247.2, + "valid_targets_min": 311 + }, + { + "epoch": 3.3968253968253967, + "grad_norm": 0.47784011418612515, + "learning_rate": 2.4496707318333323e-05, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.176387220621109, + "step": 2140, + "valid_targets_mean": 4379.1, + "valid_targets_min": 1066 + }, + { + "epoch": 3.4047619047619047, + "grad_norm": 0.41751426715134293, + "learning_rate": 2.441954562874541e-05, + "loss": 0.1494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1648518145084381, + "step": 2145, + "valid_targets_mean": 5245.0, + "valid_targets_min": 1607 + }, + { + "epoch": 3.4126984126984126, + "grad_norm": 0.34417394477532504, + "learning_rate": 2.434231471548893e-05, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16924996674060822, + "step": 2150, + "valid_targets_mean": 7679.7, + "valid_targets_min": 4644 + }, + { + "epoch": 3.4206349206349205, + "grad_norm": 0.4099313742801725, + "learning_rate": 2.4265015788237348e-05, + "loss": 0.1638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1553308665752411, + "step": 2155, + "valid_targets_mean": 5291.8, + "valid_targets_min": 1013 + }, + { + "epoch": 3.4285714285714284, + "grad_norm": 0.3644179150651529, + "learning_rate": 2.4187650057729465e-05, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13109582662582397, + "step": 2160, + "valid_targets_mean": 6145.5, + "valid_targets_min": 395 + }, + { + "epoch": 3.4365079365079367, + "grad_norm": 0.4344903235604141, + "learning_rate": 2.4110218735750403e-05, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16567984223365784, + "step": 2165, + "valid_targets_mean": 5219.4, + "valid_targets_min": 272 + }, + { + "epoch": 3.4444444444444446, + "grad_norm": 0.44162025985134756, + "learning_rate": 2.4032723035112667e-05, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17561107873916626, + "step": 2170, + "valid_targets_mean": 4544.6, + "valid_targets_min": 776 + }, + { + "epoch": 3.4523809523809526, + "grad_norm": 0.3932602300074192, + "learning_rate": 2.3955164169637124e-05, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15324220061302185, + "step": 2175, + "valid_targets_mean": 5706.5, + "valid_targets_min": 347 + }, + { + "epoch": 3.4603174603174605, + "grad_norm": 0.42827386189990235, + "learning_rate": 2.387754335413398e-05, + "loss": 0.1606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15386894345283508, + "step": 2180, + "valid_targets_mean": 4324.1, + "valid_targets_min": 373 + }, + { + "epoch": 3.4682539682539684, + "grad_norm": 0.41357075683918504, + "learning_rate": 2.3799861804383807e-05, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15950973331928253, + "step": 2185, + "valid_targets_mean": 5571.4, + "valid_targets_min": 1700 + }, + { + "epoch": 3.4761904761904763, + "grad_norm": 0.4041812547912995, + "learning_rate": 2.3722120737118414e-05, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1401449292898178, + "step": 2190, + "valid_targets_mean": 5355.0, + "valid_targets_min": 657 + }, + { + "epoch": 3.484126984126984, + "grad_norm": 0.4349818563348464, + "learning_rate": 2.3644321370001868e-05, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16082270443439484, + "step": 2195, + "valid_targets_mean": 4887.8, + "valid_targets_min": 201 + }, + { + "epoch": 3.492063492063492, + "grad_norm": 0.41308220488892183, + "learning_rate": 2.3566464921611393e-05, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16265086829662323, + "step": 2200, + "valid_targets_mean": 5437.2, + "valid_targets_min": 625 + }, + { + "epoch": 3.5, + "grad_norm": 0.4212037346610392, + "learning_rate": 2.348855261141827e-05, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16348662972450256, + "step": 2205, + "valid_targets_mean": 4889.4, + "valid_targets_min": 783 + }, + { + "epoch": 3.507936507936508, + "grad_norm": 0.4704570906062029, + "learning_rate": 2.341058565976874e-05, + "loss": 0.1644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20820948481559753, + "step": 2210, + "valid_targets_mean": 5980.6, + "valid_targets_min": 961 + }, + { + "epoch": 3.515873015873016, + "grad_norm": 0.5036481968595117, + "learning_rate": 2.3332565287864918e-05, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15473511815071106, + "step": 2215, + "valid_targets_mean": 5321.4, + "valid_targets_min": 1443 + }, + { + "epoch": 3.5238095238095237, + "grad_norm": 0.3735233109636718, + "learning_rate": 2.325449271774563e-05, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14910230040550232, + "step": 2220, + "valid_targets_mean": 6132.2, + "valid_targets_min": 1324 + }, + { + "epoch": 3.5317460317460316, + "grad_norm": 0.444413935820999, + "learning_rate": 2.3176369172267286e-05, + "loss": 0.1586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17016586661338806, + "step": 2225, + "valid_targets_mean": 4620.8, + "valid_targets_min": 1621 + }, + { + "epoch": 3.5396825396825395, + "grad_norm": 0.3953957921849172, + "learning_rate": 2.3098195875084732e-05, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15769408643245697, + "step": 2230, + "valid_targets_mean": 5690.9, + "valid_targets_min": 764 + }, + { + "epoch": 3.5476190476190474, + "grad_norm": 0.5077199865444484, + "learning_rate": 2.301997405063208e-05, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15993595123291016, + "step": 2235, + "valid_targets_mean": 5084.5, + "valid_targets_min": 281 + }, + { + "epoch": 3.5555555555555554, + "grad_norm": 0.4086230414672359, + "learning_rate": 2.2941704924103535e-05, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1494399756193161, + "step": 2240, + "valid_targets_mean": 5467.4, + "valid_targets_min": 327 + }, + { + "epoch": 3.5634920634920633, + "grad_norm": 0.42370220867636965, + "learning_rate": 2.2863389721434165e-05, + "loss": 0.167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16436323523521423, + "step": 2245, + "valid_targets_mean": 6613.1, + "valid_targets_min": 301 + }, + { + "epoch": 3.571428571428571, + "grad_norm": 0.47414264477785967, + "learning_rate": 2.2785029669280775e-05, + "loss": 0.1463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16836649179458618, + "step": 2250, + "valid_targets_mean": 4143.4, + "valid_targets_min": 1765 + }, + { + "epoch": 3.5793650793650795, + "grad_norm": 0.4279968424111969, + "learning_rate": 2.2706625995002626e-05, + "loss": 0.1602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15963539481163025, + "step": 2255, + "valid_targets_mean": 5683.1, + "valid_targets_min": 677 + }, + { + "epoch": 3.5873015873015874, + "grad_norm": 0.43551657194367943, + "learning_rate": 2.262817992664224e-05, + "loss": 0.1558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15465790033340454, + "step": 2260, + "valid_targets_mean": 5270.2, + "valid_targets_min": 792 + }, + { + "epoch": 3.5952380952380953, + "grad_norm": 0.473618019837446, + "learning_rate": 2.2549692692906158e-05, + "loss": 0.1664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19456999003887177, + "step": 2265, + "valid_targets_mean": 4033.5, + "valid_targets_min": 1118 + }, + { + "epoch": 3.6031746031746033, + "grad_norm": 0.4610864812937639, + "learning_rate": 2.24711655231457e-05, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16838069260120392, + "step": 2270, + "valid_targets_mean": 4850.8, + "valid_targets_min": 893 + }, + { + "epoch": 3.611111111111111, + "grad_norm": 0.4183921042676627, + "learning_rate": 2.2392599647337724e-05, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15832781791687012, + "step": 2275, + "valid_targets_mean": 5614.7, + "valid_targets_min": 2089 + }, + { + "epoch": 3.619047619047619, + "grad_norm": 0.4746926085970967, + "learning_rate": 2.23139962960653e-05, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17942070960998535, + "step": 2280, + "valid_targets_mean": 4651.3, + "valid_targets_min": 217 + }, + { + "epoch": 3.626984126984127, + "grad_norm": 0.42504033178119727, + "learning_rate": 2.2235356700498528e-05, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1609695851802826, + "step": 2285, + "valid_targets_mean": 5650.9, + "valid_targets_min": 693 + }, + { + "epoch": 3.634920634920635, + "grad_norm": 0.35104401078143266, + "learning_rate": 2.2156682092375175e-05, + "loss": 0.1593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14197728037834167, + "step": 2290, + "valid_targets_mean": 6449.4, + "valid_targets_min": 2699 + }, + { + "epoch": 3.642857142857143, + "grad_norm": 0.48629893819073916, + "learning_rate": 2.2077973703981423e-05, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.171891987323761, + "step": 2295, + "valid_targets_mean": 5000.9, + "valid_targets_min": 325 + }, + { + "epoch": 3.6507936507936507, + "grad_norm": 0.38982495934645944, + "learning_rate": 2.1999232768132552e-05, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15166640281677246, + "step": 2300, + "valid_targets_mean": 5598.8, + "valid_targets_min": 899 + }, + { + "epoch": 3.6587301587301586, + "grad_norm": 0.4538805178933535, + "learning_rate": 2.1920460518153637e-05, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13476833701133728, + "step": 2305, + "valid_targets_mean": 4883.2, + "valid_targets_min": 655 + }, + { + "epoch": 3.6666666666666665, + "grad_norm": 0.4650060205808812, + "learning_rate": 2.1841658187860232e-05, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1431887298822403, + "step": 2310, + "valid_targets_mean": 5863.8, + "valid_targets_min": 2487 + }, + { + "epoch": 3.674603174603175, + "grad_norm": 0.40827307741804697, + "learning_rate": 2.176282701153904e-05, + "loss": 0.1567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16187241673469543, + "step": 2315, + "valid_targets_mean": 5186.4, + "valid_targets_min": 881 + }, + { + "epoch": 3.682539682539683, + "grad_norm": 0.40111783297909087, + "learning_rate": 2.1683968223928572e-05, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1610245704650879, + "step": 2320, + "valid_targets_mean": 5193.5, + "valid_targets_min": 2007 + }, + { + "epoch": 3.6904761904761907, + "grad_norm": 0.4108636912566655, + "learning_rate": 2.1605083060199835e-05, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19376526772975922, + "step": 2325, + "valid_targets_mean": 5273.6, + "valid_targets_min": 918 + }, + { + "epoch": 3.6984126984126986, + "grad_norm": 0.4090720045772208, + "learning_rate": 2.152617275593694e-05, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1690284013748169, + "step": 2330, + "valid_targets_mean": 5263.3, + "valid_targets_min": 1166 + }, + { + "epoch": 3.7063492063492065, + "grad_norm": 0.37473461724981033, + "learning_rate": 2.144723854711781e-05, + "loss": 0.1513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14938317239284515, + "step": 2335, + "valid_targets_mean": 5389.9, + "valid_targets_min": 574 + }, + { + "epoch": 3.7142857142857144, + "grad_norm": 0.4843387089458587, + "learning_rate": 2.1368281670094766e-05, + "loss": 0.166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17382203042507172, + "step": 2340, + "valid_targets_mean": 4600.9, + "valid_targets_min": 1981 + }, + { + "epoch": 3.7222222222222223, + "grad_norm": 0.4119585290716737, + "learning_rate": 2.1289303361575175e-05, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15374284982681274, + "step": 2345, + "valid_targets_mean": 5633.0, + "valid_targets_min": 2046 + }, + { + "epoch": 3.7301587301587302, + "grad_norm": 0.3952648176140555, + "learning_rate": 2.121030485860211e-05, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17831559479236603, + "step": 2350, + "valid_targets_mean": 5893.4, + "valid_targets_min": 2787 + }, + { + "epoch": 3.738095238095238, + "grad_norm": 0.41411933208876145, + "learning_rate": 2.113128739853493e-05, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17142191529273987, + "step": 2355, + "valid_targets_mean": 5256.2, + "valid_targets_min": 542 + }, + { + "epoch": 3.746031746031746, + "grad_norm": 0.45575184540137365, + "learning_rate": 2.1052252219029944e-05, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1489437222480774, + "step": 2360, + "valid_targets_mean": 5599.4, + "valid_targets_min": 360 + }, + { + "epoch": 3.753968253968254, + "grad_norm": 0.38228242544809465, + "learning_rate": 2.0973200558020967e-05, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1508997082710266, + "step": 2365, + "valid_targets_mean": 5869.1, + "valid_targets_min": 3409 + }, + { + "epoch": 3.761904761904762, + "grad_norm": 0.4148754628490215, + "learning_rate": 2.0894133653700005e-05, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1601417064666748, + "step": 2370, + "valid_targets_mean": 5959.2, + "valid_targets_min": 778 + }, + { + "epoch": 3.7698412698412698, + "grad_norm": 0.39421365324647456, + "learning_rate": 2.0815052744497795e-05, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16802407801151276, + "step": 2375, + "valid_targets_mean": 5507.4, + "valid_targets_min": 539 + }, + { + "epoch": 3.7777777777777777, + "grad_norm": 0.4179727695736473, + "learning_rate": 2.0735959069064434e-05, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1505141705274582, + "step": 2380, + "valid_targets_mean": 4560.2, + "valid_targets_min": 946 + }, + { + "epoch": 3.7857142857142856, + "grad_norm": 0.4025223262448166, + "learning_rate": 2.065685386624999e-05, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16200333833694458, + "step": 2385, + "valid_targets_mean": 5546.6, + "valid_targets_min": 2114 + }, + { + "epoch": 3.7936507936507935, + "grad_norm": 0.42971080125523514, + "learning_rate": 2.0577738375085076e-05, + "loss": 0.1539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13847434520721436, + "step": 2390, + "valid_targets_mean": 4286.1, + "valid_targets_min": 670 + }, + { + "epoch": 3.8015873015873014, + "grad_norm": 0.4040671130609569, + "learning_rate": 2.0498613834761462e-05, + "loss": 0.1709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16613656282424927, + "step": 2395, + "valid_targets_mean": 5606.1, + "valid_targets_min": 1122 + }, + { + "epoch": 3.8095238095238093, + "grad_norm": 0.4104422551171815, + "learning_rate": 2.041948148461264e-05, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16815027594566345, + "step": 2400, + "valid_targets_mean": 6071.4, + "valid_targets_min": 1424 + }, + { + "epoch": 3.817460317460317, + "grad_norm": 0.433862516805992, + "learning_rate": 2.0340342564094436e-05, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1456870138645172, + "step": 2405, + "valid_targets_mean": 4660.6, + "valid_targets_min": 300 + }, + { + "epoch": 3.825396825396825, + "grad_norm": 0.37026873619906453, + "learning_rate": 2.0261198312765597e-05, + "loss": 0.1569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17130400240421295, + "step": 2410, + "valid_targets_mean": 7170.6, + "valid_targets_min": 3870 + }, + { + "epoch": 3.8333333333333335, + "grad_norm": 0.37774101351813544, + "learning_rate": 2.0182049970268355e-05, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1540232002735138, + "step": 2415, + "valid_targets_mean": 5801.1, + "valid_targets_min": 2004 + }, + { + "epoch": 3.8412698412698414, + "grad_norm": 0.40806762468519775, + "learning_rate": 2.010289877630902e-05, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1633833944797516, + "step": 2420, + "valid_targets_mean": 6180.1, + "valid_targets_min": 278 + }, + { + "epoch": 3.8492063492063493, + "grad_norm": 0.4658635857688465, + "learning_rate": 2.002374597063858e-05, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14994177222251892, + "step": 2425, + "valid_targets_mean": 5532.7, + "valid_targets_min": 741 + }, + { + "epoch": 3.857142857142857, + "grad_norm": 0.48235346332409923, + "learning_rate": 1.9944592793033255e-05, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1955932080745697, + "step": 2430, + "valid_targets_mean": 4724.8, + "valid_targets_min": 766 + }, + { + "epoch": 3.865079365079365, + "grad_norm": 0.8801286649564228, + "learning_rate": 1.9865440483275086e-05, + "loss": 0.1609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1733412742614746, + "step": 2435, + "valid_targets_mean": 5920.0, + "valid_targets_min": 2207 + }, + { + "epoch": 3.873015873015873, + "grad_norm": 0.4518531270411737, + "learning_rate": 1.978629028113254e-05, + "loss": 0.157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15560846030712128, + "step": 2440, + "valid_targets_mean": 4698.5, + "valid_targets_min": 369 + }, + { + "epoch": 3.880952380952381, + "grad_norm": 0.4095907564342567, + "learning_rate": 1.9707143426341058e-05, + "loss": 0.1596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14006567001342773, + "step": 2445, + "valid_targets_mean": 5711.9, + "valid_targets_min": 1934 + }, + { + "epoch": 3.888888888888889, + "grad_norm": 0.38371188352350993, + "learning_rate": 1.962800115858364e-05, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15609079599380493, + "step": 2450, + "valid_targets_mean": 6032.2, + "valid_targets_min": 1733 + }, + { + "epoch": 3.8968253968253967, + "grad_norm": 0.4118539842920471, + "learning_rate": 1.9548864717471472e-05, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14997440576553345, + "step": 2455, + "valid_targets_mean": 4638.2, + "valid_targets_min": 2217 + }, + { + "epoch": 3.9047619047619047, + "grad_norm": 0.4025039557270683, + "learning_rate": 1.9469735342524454e-05, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1860060691833496, + "step": 2460, + "valid_targets_mean": 5977.4, + "valid_targets_min": 1525 + }, + { + "epoch": 3.9126984126984126, + "grad_norm": 0.4033220910310328, + "learning_rate": 1.939061427315179e-05, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1573881059885025, + "step": 2465, + "valid_targets_mean": 5562.9, + "valid_targets_min": 466 + }, + { + "epoch": 3.9206349206349205, + "grad_norm": 0.40909002728066796, + "learning_rate": 1.931150274863265e-05, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1574082374572754, + "step": 2470, + "valid_targets_mean": 5320.6, + "valid_targets_min": 397 + }, + { + "epoch": 3.928571428571429, + "grad_norm": 0.42869522285562844, + "learning_rate": 1.9232402008096643e-05, + "loss": 0.175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19264186918735504, + "step": 2475, + "valid_targets_mean": 5145.9, + "valid_targets_min": 764 + }, + { + "epoch": 3.9365079365079367, + "grad_norm": 0.40381020748858665, + "learning_rate": 1.9153313290504495e-05, + "loss": 0.1588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13736480474472046, + "step": 2480, + "valid_targets_mean": 5991.2, + "valid_targets_min": 1935 + }, + { + "epoch": 3.9444444444444446, + "grad_norm": 0.44514859085692043, + "learning_rate": 1.9074237834628623e-05, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14704057574272156, + "step": 2485, + "valid_targets_mean": 5541.5, + "valid_targets_min": 2647 + }, + { + "epoch": 3.9523809523809526, + "grad_norm": 0.4265501479636169, + "learning_rate": 1.8995176879033698e-05, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18036606907844543, + "step": 2490, + "valid_targets_mean": 5466.8, + "valid_targets_min": 906 + }, + { + "epoch": 3.9603174603174605, + "grad_norm": 0.4487206178934192, + "learning_rate": 1.89161316620573e-05, + "loss": 0.1699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17454467713832855, + "step": 2495, + "valid_targets_mean": 5006.6, + "valid_targets_min": 287 + }, + { + "epoch": 3.9682539682539684, + "grad_norm": 0.4033222789710387, + "learning_rate": 1.8837103421790486e-05, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15375812351703644, + "step": 2500, + "valid_targets_mean": 6142.2, + "valid_targets_min": 1358 + }, + { + "epoch": 3.9761904761904763, + "grad_norm": 0.46326929396533706, + "learning_rate": 1.8758093396058386e-05, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17514026165008545, + "step": 2505, + "valid_targets_mean": 5165.4, + "valid_targets_min": 2275 + }, + { + "epoch": 3.984126984126984, + "grad_norm": 0.40301115402832177, + "learning_rate": 1.8679102822400874e-05, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15668317675590515, + "step": 2510, + "valid_targets_mean": 5199.4, + "valid_targets_min": 211 + }, + { + "epoch": 3.992063492063492, + "grad_norm": 0.4789393713046297, + "learning_rate": 1.8600132938053098e-05, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19280311465263367, + "step": 2515, + "valid_targets_mean": 4916.7, + "valid_targets_min": 1007 + }, + { + "epoch": 4.0, + "grad_norm": 0.46667355097734303, + "learning_rate": 1.8521184979926177e-05, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1989072561264038, + "step": 2520, + "valid_targets_mean": 5119.9, + "valid_targets_min": 207 + }, + { + "epoch": 4.007936507936508, + "grad_norm": 0.3847916291760838, + "learning_rate": 1.8442260184587804e-05, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16161227226257324, + "step": 2525, + "valid_targets_mean": 6026.6, + "valid_targets_min": 2419 + }, + { + "epoch": 4.015873015873016, + "grad_norm": 0.4090621169152007, + "learning_rate": 1.8363359788242842e-05, + "loss": 0.1388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14368446171283722, + "step": 2530, + "valid_targets_mean": 5862.6, + "valid_targets_min": 782 + }, + { + "epoch": 4.023809523809524, + "grad_norm": 0.6141566166940376, + "learning_rate": 1.8284485026714013e-05, + "loss": 0.1447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13589806854724884, + "step": 2535, + "valid_targets_mean": 6541.5, + "valid_targets_min": 1485 + }, + { + "epoch": 4.031746031746032, + "grad_norm": 0.4391239898708553, + "learning_rate": 1.8205637135422525e-05, + "loss": 0.1474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15070083737373352, + "step": 2540, + "valid_targets_mean": 5536.8, + "valid_targets_min": 938 + }, + { + "epoch": 4.0396825396825395, + "grad_norm": 0.41333666376927736, + "learning_rate": 1.8126817349368697e-05, + "loss": 0.1569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1329149305820465, + "step": 2545, + "valid_targets_mean": 5577.3, + "valid_targets_min": 1320 + }, + { + "epoch": 4.0476190476190474, + "grad_norm": 0.42756859856165286, + "learning_rate": 1.8048026903112632e-05, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1718430519104004, + "step": 2550, + "valid_targets_mean": 5278.7, + "valid_targets_min": 1393 + }, + { + "epoch": 4.055555555555555, + "grad_norm": 0.7929559502540676, + "learning_rate": 1.7969267030754903e-05, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1519625186920166, + "step": 2555, + "valid_targets_mean": 4177.6, + "valid_targets_min": 290 + }, + { + "epoch": 4.063492063492063, + "grad_norm": 0.43339872921712147, + "learning_rate": 1.7890538965917184e-05, + "loss": 0.1359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12034216523170471, + "step": 2560, + "valid_targets_mean": 5259.5, + "valid_targets_min": 600 + }, + { + "epoch": 4.071428571428571, + "grad_norm": 0.4431280646696749, + "learning_rate": 1.7811843941722952e-05, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16165150701999664, + "step": 2565, + "valid_targets_mean": 5233.3, + "valid_targets_min": 343 + }, + { + "epoch": 4.079365079365079, + "grad_norm": 0.4733813623577339, + "learning_rate": 1.7733183190778174e-05, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17020142078399658, + "step": 2570, + "valid_targets_mean": 4545.4, + "valid_targets_min": 852 + }, + { + "epoch": 4.087301587301587, + "grad_norm": 0.4256609723968049, + "learning_rate": 1.7654557945151968e-05, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16018405556678772, + "step": 2575, + "valid_targets_mean": 5520.1, + "valid_targets_min": 2986 + }, + { + "epoch": 4.095238095238095, + "grad_norm": 0.41262793853257446, + "learning_rate": 1.7575969436357352e-05, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13311775028705597, + "step": 2580, + "valid_targets_mean": 5285.7, + "valid_targets_min": 1788 + }, + { + "epoch": 4.103174603174603, + "grad_norm": 0.45945223536796, + "learning_rate": 1.7497418895331934e-05, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14666880667209625, + "step": 2585, + "valid_targets_mean": 4386.4, + "valid_targets_min": 325 + }, + { + "epoch": 4.111111111111111, + "grad_norm": 0.4675580885936456, + "learning_rate": 1.7418907552418597e-05, + "loss": 0.1505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15302041172981262, + "step": 2590, + "valid_targets_mean": 5458.4, + "valid_targets_min": 2918 + }, + { + "epoch": 4.119047619047619, + "grad_norm": 0.43934364103885165, + "learning_rate": 1.7340436637346315e-05, + "loss": 0.1473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14084570109844208, + "step": 2595, + "valid_targets_mean": 4983.3, + "valid_targets_min": 1133 + }, + { + "epoch": 4.1269841269841265, + "grad_norm": 0.4164892763919611, + "learning_rate": 1.726200737921079e-05, + "loss": 0.1429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12312793731689453, + "step": 2600, + "valid_targets_mean": 6296.2, + "valid_targets_min": 596 + }, + { + "epoch": 4.134920634920635, + "grad_norm": 0.4532045098767217, + "learning_rate": 1.718362100645527e-05, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13698981702327728, + "step": 2605, + "valid_targets_mean": 4957.6, + "valid_targets_min": 1130 + }, + { + "epoch": 4.142857142857143, + "grad_norm": 0.4006124579380861, + "learning_rate": 1.710527874685129e-05, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13815411925315857, + "step": 2610, + "valid_targets_mean": 6126.5, + "valid_targets_min": 855 + }, + { + "epoch": 4.150793650793651, + "grad_norm": 0.4714675135702414, + "learning_rate": 1.702698182747942e-05, + "loss": 0.16, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14571262896060944, + "step": 2615, + "valid_targets_mean": 4147.3, + "valid_targets_min": 284 + }, + { + "epoch": 4.158730158730159, + "grad_norm": 0.42920831749050425, + "learning_rate": 1.6948731474710075e-05, + "loss": 0.1328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13257333636283875, + "step": 2620, + "valid_targets_mean": 5335.2, + "valid_targets_min": 618 + }, + { + "epoch": 4.166666666666667, + "grad_norm": 0.48356837750736714, + "learning_rate": 1.68705289141843e-05, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1508401334285736, + "step": 2625, + "valid_targets_mean": 6277.6, + "valid_targets_min": 2995 + }, + { + "epoch": 4.174603174603175, + "grad_norm": 0.4755928358851968, + "learning_rate": 1.679237537079454e-05, + "loss": 0.1413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14255402982234955, + "step": 2630, + "valid_targets_mean": 4744.4, + "valid_targets_min": 466 + }, + { + "epoch": 4.182539682539683, + "grad_norm": 0.41585215382481877, + "learning_rate": 1.6714272068665526e-05, + "loss": 0.1503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13894084095954895, + "step": 2635, + "valid_targets_mean": 5902.4, + "valid_targets_min": 838 + }, + { + "epoch": 4.190476190476191, + "grad_norm": 0.4189696451329358, + "learning_rate": 1.663622023113501e-05, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1487642228603363, + "step": 2640, + "valid_targets_mean": 6358.3, + "valid_targets_min": 3083 + }, + { + "epoch": 4.198412698412699, + "grad_norm": 0.44087488345238157, + "learning_rate": 1.655822108073467e-05, + "loss": 0.1503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14662505686283112, + "step": 2645, + "valid_targets_mean": 5523.2, + "valid_targets_min": 1561 + }, + { + "epoch": 4.2063492063492065, + "grad_norm": 0.47060099939704136, + "learning_rate": 1.648027583917095e-05, + "loss": 0.1427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13752037286758423, + "step": 2650, + "valid_targets_mean": 5949.6, + "valid_targets_min": 2002 + }, + { + "epoch": 4.214285714285714, + "grad_norm": 0.4223944809526827, + "learning_rate": 1.640238572730591e-05, + "loss": 0.1464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14241717755794525, + "step": 2655, + "valid_targets_mean": 5939.1, + "valid_targets_min": 1808 + }, + { + "epoch": 4.222222222222222, + "grad_norm": 0.3989848199161138, + "learning_rate": 1.632455196513809e-05, + "loss": 0.1457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15084542334079742, + "step": 2660, + "valid_targets_mean": 6282.1, + "valid_targets_min": 3096 + }, + { + "epoch": 4.23015873015873, + "grad_norm": 0.4334279841268595, + "learning_rate": 1.624677577178345e-05, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15273742377758026, + "step": 2665, + "valid_targets_mean": 5628.8, + "valid_targets_min": 2067 + }, + { + "epoch": 4.238095238095238, + "grad_norm": 0.5090603717967935, + "learning_rate": 1.616905836545624e-05, + "loss": 0.1463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14674241840839386, + "step": 2670, + "valid_targets_mean": 4538.6, + "valid_targets_min": 294 + }, + { + "epoch": 4.246031746031746, + "grad_norm": 0.40233869436893316, + "learning_rate": 1.6091400963449894e-05, + "loss": 0.1499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16291236877441406, + "step": 2675, + "valid_targets_mean": 5911.8, + "valid_targets_min": 1450 + }, + { + "epoch": 4.253968253968254, + "grad_norm": 0.46919428409154407, + "learning_rate": 1.6013804782118043e-05, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15669263899326324, + "step": 2680, + "valid_targets_mean": 4653.7, + "valid_targets_min": 1195 + }, + { + "epoch": 4.261904761904762, + "grad_norm": 0.46975419177737526, + "learning_rate": 1.5936271036855372e-05, + "loss": 0.155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15147748589515686, + "step": 2685, + "valid_targets_mean": 6341.4, + "valid_targets_min": 3029 + }, + { + "epoch": 4.26984126984127, + "grad_norm": 0.416023668049094, + "learning_rate": 1.585880094207864e-05, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15425267815589905, + "step": 2690, + "valid_targets_mean": 5568.8, + "valid_targets_min": 2137 + }, + { + "epoch": 4.277777777777778, + "grad_norm": 0.48941294002103736, + "learning_rate": 1.5781395711207664e-05, + "loss": 0.1483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16209441423416138, + "step": 2695, + "valid_targets_mean": 5182.4, + "valid_targets_min": 3065 + }, + { + "epoch": 4.285714285714286, + "grad_norm": 0.4233678017768594, + "learning_rate": 1.5704056556646255e-05, + "loss": 0.1486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15272748470306396, + "step": 2700, + "valid_targets_mean": 6165.9, + "valid_targets_min": 289 + }, + { + "epoch": 4.2936507936507935, + "grad_norm": 0.4883272680118086, + "learning_rate": 1.562678468976329e-05, + "loss": 0.1558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14731578528881073, + "step": 2705, + "valid_targets_mean": 3793.1, + "valid_targets_min": 268 + }, + { + "epoch": 4.301587301587301, + "grad_norm": 0.4278181836239168, + "learning_rate": 1.5549581320873715e-05, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14064767956733704, + "step": 2710, + "valid_targets_mean": 6504.5, + "valid_targets_min": 2638 + }, + { + "epoch": 4.309523809523809, + "grad_norm": 0.44190091334584447, + "learning_rate": 1.5472447659219573e-05, + "loss": 0.1501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1606476753950119, + "step": 2715, + "valid_targets_mean": 5205.2, + "valid_targets_min": 1350 + }, + { + "epoch": 4.317460317460317, + "grad_norm": 0.4287418443341925, + "learning_rate": 1.5395384912951096e-05, + "loss": 0.1558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.143661230802536, + "step": 2720, + "valid_targets_mean": 5759.1, + "valid_targets_min": 539 + }, + { + "epoch": 4.325396825396825, + "grad_norm": 0.431052688233739, + "learning_rate": 1.531839428910774e-05, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13569197058677673, + "step": 2725, + "valid_targets_mean": 5779.1, + "valid_targets_min": 2902 + }, + { + "epoch": 4.333333333333333, + "grad_norm": 0.4462658898939222, + "learning_rate": 1.5241476993599318e-05, + "loss": 0.1399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15712681412696838, + "step": 2730, + "valid_targets_mean": 5659.5, + "valid_targets_min": 719 + }, + { + "epoch": 4.341269841269841, + "grad_norm": 0.4365063885634691, + "learning_rate": 1.5164634231187106e-05, + "loss": 0.1418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14231045544147491, + "step": 2735, + "valid_targets_mean": 5411.6, + "valid_targets_min": 1293 + }, + { + "epoch": 4.349206349206349, + "grad_norm": 0.3675606877485401, + "learning_rate": 1.5087867205464933e-05, + "loss": 0.1409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12964603304862976, + "step": 2740, + "valid_targets_mean": 6364.4, + "valid_targets_min": 281 + }, + { + "epoch": 4.357142857142857, + "grad_norm": 0.44160911003260395, + "learning_rate": 1.5011177118840376e-05, + "loss": 0.1414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1406039297580719, + "step": 2745, + "valid_targets_mean": 5633.9, + "valid_targets_min": 403 + }, + { + "epoch": 4.365079365079365, + "grad_norm": 0.45789145016787364, + "learning_rate": 1.4934565172515917e-05, + "loss": 0.1638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16867199540138245, + "step": 2750, + "valid_targets_mean": 4970.6, + "valid_targets_min": 2561 + }, + { + "epoch": 4.3730158730158735, + "grad_norm": 0.41734019364461133, + "learning_rate": 1.4858032566470107e-05, + "loss": 0.1429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14065919816493988, + "step": 2755, + "valid_targets_mean": 6324.4, + "valid_targets_min": 2455 + }, + { + "epoch": 4.380952380952381, + "grad_norm": 0.40415096511524873, + "learning_rate": 1.4781580499438794e-05, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13654825091362, + "step": 2760, + "valid_targets_mean": 5964.2, + "valid_targets_min": 300 + }, + { + "epoch": 4.388888888888889, + "grad_norm": 0.40778882084936363, + "learning_rate": 1.4705210168896327e-05, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12639005482196808, + "step": 2765, + "valid_targets_mean": 5996.9, + "valid_targets_min": 2114 + }, + { + "epoch": 4.396825396825397, + "grad_norm": 0.44063352236267495, + "learning_rate": 1.462892277103681e-05, + "loss": 0.1474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14338403940200806, + "step": 2770, + "valid_targets_mean": 5152.0, + "valid_targets_min": 487 + }, + { + "epoch": 4.404761904761905, + "grad_norm": 0.3874565040882736, + "learning_rate": 1.455271950075539e-05, + "loss": 0.1383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14000162482261658, + "step": 2775, + "valid_targets_mean": 5943.4, + "valid_targets_min": 3591 + }, + { + "epoch": 4.412698412698413, + "grad_norm": 0.3941349722942052, + "learning_rate": 1.4476601551629493e-05, + "loss": 0.1488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1478402018547058, + "step": 2780, + "valid_targets_mean": 6708.7, + "valid_targets_min": 3502 + }, + { + "epoch": 4.420634920634921, + "grad_norm": 0.41875630131970193, + "learning_rate": 1.4400570115900147e-05, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15849897265434265, + "step": 2785, + "valid_targets_mean": 5522.6, + "valid_targets_min": 865 + }, + { + "epoch": 4.428571428571429, + "grad_norm": 0.5412264352868692, + "learning_rate": 1.4324626384453345e-05, + "loss": 0.1476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12859734892845154, + "step": 2790, + "valid_targets_mean": 4754.4, + "valid_targets_min": 1324 + }, + { + "epoch": 4.436507936507937, + "grad_norm": 0.42316575140054596, + "learning_rate": 1.4248771546801339e-05, + "loss": 0.1504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14226463437080383, + "step": 2795, + "valid_targets_mean": 5423.4, + "valid_targets_min": 776 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.431013383627872, + "learning_rate": 1.4173006791064023e-05, + "loss": 0.1461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14574524760246277, + "step": 2800, + "valid_targets_mean": 5876.1, + "valid_targets_min": 2317 + }, + { + "epoch": 4.4523809523809526, + "grad_norm": 0.4037541081664408, + "learning_rate": 1.4097333303950368e-05, + "loss": 0.153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1440451443195343, + "step": 2805, + "valid_targets_mean": 5450.2, + "valid_targets_min": 1747 + }, + { + "epoch": 4.4603174603174605, + "grad_norm": 0.3978151834112168, + "learning_rate": 1.4021752270739759e-05, + "loss": 0.1533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14280155301094055, + "step": 2810, + "valid_targets_mean": 6047.3, + "valid_targets_min": 2885 + }, + { + "epoch": 4.468253968253968, + "grad_norm": 0.39606766776930213, + "learning_rate": 1.3946264875263485e-05, + "loss": 0.1494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13499273359775543, + "step": 2815, + "valid_targets_mean": 5398.1, + "valid_targets_min": 881 + }, + { + "epoch": 4.476190476190476, + "grad_norm": 0.41951195439714245, + "learning_rate": 1.3870872299886184e-05, + "loss": 0.1522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15680038928985596, + "step": 2820, + "valid_targets_mean": 6016.8, + "valid_targets_min": 953 + }, + { + "epoch": 4.484126984126984, + "grad_norm": 0.6264261787683508, + "learning_rate": 1.3795575725487303e-05, + "loss": 0.1435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16164864599704742, + "step": 2825, + "valid_targets_mean": 5240.9, + "valid_targets_min": 1700 + }, + { + "epoch": 4.492063492063492, + "grad_norm": 0.8311634034364538, + "learning_rate": 1.3720376331442652e-05, + "loss": 0.1527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18453676998615265, + "step": 2830, + "valid_targets_mean": 6361.3, + "valid_targets_min": 3395 + }, + { + "epoch": 4.5, + "grad_norm": 0.42732218297283503, + "learning_rate": 1.364527529560586e-05, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15269696712493896, + "step": 2835, + "valid_targets_mean": 5456.8, + "valid_targets_min": 297 + }, + { + "epoch": 4.507936507936508, + "grad_norm": 0.39922258358961354, + "learning_rate": 1.3570273794289978e-05, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15746167302131653, + "step": 2840, + "valid_targets_mean": 5865.9, + "valid_targets_min": 1646 + }, + { + "epoch": 4.515873015873016, + "grad_norm": 0.4249607848655103, + "learning_rate": 1.3495373002249061e-05, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14502134919166565, + "step": 2845, + "valid_targets_mean": 5561.5, + "valid_targets_min": 2046 + }, + { + "epoch": 4.523809523809524, + "grad_norm": 0.45992538161194857, + "learning_rate": 1.3420574092659713e-05, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13943564891815186, + "step": 2850, + "valid_targets_mean": 4809.2, + "valid_targets_min": 299 + }, + { + "epoch": 4.531746031746032, + "grad_norm": 0.4849743503805474, + "learning_rate": 1.3345878237102766e-05, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.140847310423851, + "step": 2855, + "valid_targets_mean": 4423.5, + "valid_targets_min": 566 + }, + { + "epoch": 4.5396825396825395, + "grad_norm": 0.4337324469927627, + "learning_rate": 1.3271286605544906e-05, + "loss": 0.1446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1428968906402588, + "step": 2860, + "valid_targets_mean": 5831.9, + "valid_targets_min": 1871 + }, + { + "epoch": 4.5476190476190474, + "grad_norm": 0.46560324944354126, + "learning_rate": 1.3196800366320357e-05, + "loss": 0.1486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14274823665618896, + "step": 2865, + "valid_targets_mean": 4730.6, + "valid_targets_min": 283 + }, + { + "epoch": 4.555555555555555, + "grad_norm": 0.44153707002684167, + "learning_rate": 1.3122420686112554e-05, + "loss": 0.1528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14780506491661072, + "step": 2870, + "valid_targets_mean": 4814.2, + "valid_targets_min": 604 + }, + { + "epoch": 4.563492063492063, + "grad_norm": 0.4539431427613466, + "learning_rate": 1.3048148729935917e-05, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18015024065971375, + "step": 2875, + "valid_targets_mean": 5631.1, + "valid_targets_min": 275 + }, + { + "epoch": 4.571428571428571, + "grad_norm": 0.42536670553586153, + "learning_rate": 1.297398566111756e-05, + "loss": 0.1435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14803089201450348, + "step": 2880, + "valid_targets_mean": 5638.5, + "valid_targets_min": 2487 + }, + { + "epoch": 4.579365079365079, + "grad_norm": 0.42937048670828487, + "learning_rate": 1.2899932641279082e-05, + "loss": 0.149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13798286020755768, + "step": 2885, + "valid_targets_mean": 4947.8, + "valid_targets_min": 299 + }, + { + "epoch": 4.587301587301587, + "grad_norm": 0.4404250376238387, + "learning_rate": 1.2825990830318395e-05, + "loss": 0.1533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17519637942314148, + "step": 2890, + "valid_targets_mean": 5130.4, + "valid_targets_min": 1535 + }, + { + "epoch": 4.595238095238095, + "grad_norm": 0.4758481215827844, + "learning_rate": 1.2752161386391526e-05, + "loss": 0.1427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13737112283706665, + "step": 2895, + "valid_targets_mean": 4636.4, + "valid_targets_min": 293 + }, + { + "epoch": 4.603174603174603, + "grad_norm": 0.4767645902681976, + "learning_rate": 1.2678445465894491e-05, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16972827911376953, + "step": 2900, + "valid_targets_mean": 4518.6, + "valid_targets_min": 1195 + }, + { + "epoch": 4.611111111111111, + "grad_norm": 0.4257119901155639, + "learning_rate": 1.2604844223445181e-05, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15088322758674622, + "step": 2905, + "valid_targets_mean": 5354.5, + "valid_targets_min": 870 + }, + { + "epoch": 4.619047619047619, + "grad_norm": 0.42846734809049425, + "learning_rate": 1.2531358811865268e-05, + "loss": 0.1488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15017569065093994, + "step": 2910, + "valid_targets_mean": 5312.0, + "valid_targets_min": 737 + }, + { + "epoch": 4.6269841269841265, + "grad_norm": 0.4925025724969061, + "learning_rate": 1.2457990382162173e-05, + "loss": 0.1437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1452239453792572, + "step": 2915, + "valid_targets_mean": 4835.2, + "valid_targets_min": 656 + }, + { + "epoch": 4.634920634920634, + "grad_norm": 0.4662611029234124, + "learning_rate": 1.238474008351101e-05, + "loss": 0.1533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15926414728164673, + "step": 2920, + "valid_targets_mean": 5120.9, + "valid_targets_min": 674 + }, + { + "epoch": 4.642857142857143, + "grad_norm": 0.4322145630104494, + "learning_rate": 1.2311609063236594e-05, + "loss": 0.1413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1390167623758316, + "step": 2925, + "valid_targets_mean": 4672.6, + "valid_targets_min": 1746 + }, + { + "epoch": 4.650793650793651, + "grad_norm": 0.4130969360637945, + "learning_rate": 1.2238598466795493e-05, + "loss": 0.1386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12715992331504822, + "step": 2930, + "valid_targets_mean": 5706.3, + "valid_targets_min": 1765 + }, + { + "epoch": 4.658730158730159, + "grad_norm": 0.3829382672782595, + "learning_rate": 1.2165709437758042e-05, + "loss": 0.1426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14774689078330994, + "step": 2935, + "valid_targets_mean": 6549.7, + "valid_targets_min": 3799 + }, + { + "epoch": 4.666666666666667, + "grad_norm": 0.38939915661022567, + "learning_rate": 1.209294311779047e-05, + "loss": 0.1519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14104187488555908, + "step": 2940, + "valid_targets_mean": 6312.2, + "valid_targets_min": 2275 + }, + { + "epoch": 4.674603174603175, + "grad_norm": 0.43181417781915576, + "learning_rate": 1.2020300646637018e-05, + "loss": 0.1499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11396326869726181, + "step": 2945, + "valid_targets_mean": 5581.9, + "valid_targets_min": 260 + }, + { + "epoch": 4.682539682539683, + "grad_norm": 0.44410129386120123, + "learning_rate": 1.1947783162102043e-05, + "loss": 0.1474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1418922394514084, + "step": 2950, + "valid_targets_mean": 4655.8, + "valid_targets_min": 693 + }, + { + "epoch": 4.690476190476191, + "grad_norm": 0.48458127033747794, + "learning_rate": 1.1875391800032248e-05, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1841154396533966, + "step": 2955, + "valid_targets_mean": 4632.9, + "valid_targets_min": 313 + }, + { + "epoch": 4.698412698412699, + "grad_norm": 0.4032339427441577, + "learning_rate": 1.1803127694298873e-05, + "loss": 0.1426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14213016629219055, + "step": 2960, + "valid_targets_mean": 5584.8, + "valid_targets_min": 934 + }, + { + "epoch": 4.7063492063492065, + "grad_norm": 0.4419975398132066, + "learning_rate": 1.173099197677992e-05, + "loss": 0.1437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1430945247411728, + "step": 2965, + "valid_targets_mean": 5749.8, + "valid_targets_min": 2081 + }, + { + "epoch": 4.714285714285714, + "grad_norm": 0.43503178571092443, + "learning_rate": 1.1658985777342458e-05, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14752447605133057, + "step": 2970, + "valid_targets_mean": 5720.0, + "valid_targets_min": 1862 + }, + { + "epoch": 4.722222222222222, + "grad_norm": 0.42023994518841934, + "learning_rate": 1.1587110223824874e-05, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15875454246997833, + "step": 2975, + "valid_targets_mean": 5639.4, + "valid_targets_min": 1270 + }, + { + "epoch": 4.73015873015873, + "grad_norm": 0.37363144454032915, + "learning_rate": 1.151536644201925e-05, + "loss": 0.1351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11643218994140625, + "step": 2980, + "valid_targets_mean": 5878.9, + "valid_targets_min": 1841 + }, + { + "epoch": 4.738095238095238, + "grad_norm": 0.44030969849082424, + "learning_rate": 1.1443755555653751e-05, + "loss": 0.1461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15411710739135742, + "step": 2985, + "valid_targets_mean": 4914.2, + "valid_targets_min": 1221 + }, + { + "epoch": 4.746031746031746, + "grad_norm": 0.41262993153981603, + "learning_rate": 1.1372278686374935e-05, + "loss": 0.1457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1678827404975891, + "step": 2990, + "valid_targets_mean": 5873.3, + "valid_targets_min": 1320 + }, + { + "epoch": 4.753968253968254, + "grad_norm": 0.4311749330104684, + "learning_rate": 1.1300936953730273e-05, + "loss": 0.1535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.143174409866333, + "step": 2995, + "valid_targets_mean": 5119.6, + "valid_targets_min": 752 + }, + { + "epoch": 4.761904761904762, + "grad_norm": 0.361954540458806, + "learning_rate": 1.1229731475150594e-05, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13922378420829773, + "step": 3000, + "valid_targets_mean": 6611.1, + "valid_targets_min": 3655 + }, + { + "epoch": 4.76984126984127, + "grad_norm": 0.3966366790173581, + "learning_rate": 1.1158663365932529e-05, + "loss": 0.1448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15058225393295288, + "step": 3005, + "valid_targets_mean": 5934.8, + "valid_targets_min": 3133 + }, + { + "epoch": 4.777777777777778, + "grad_norm": 0.4612543471363477, + "learning_rate": 1.1087733739221109e-05, + "loss": 0.15, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15629416704177856, + "step": 3010, + "valid_targets_mean": 4712.2, + "valid_targets_min": 285 + }, + { + "epoch": 4.785714285714286, + "grad_norm": 0.38989272621213816, + "learning_rate": 1.1016943705992311e-05, + "loss": 0.1496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1398439258337021, + "step": 3015, + "valid_targets_mean": 6174.9, + "valid_targets_min": 2064 + }, + { + "epoch": 4.7936507936507935, + "grad_norm": 0.4371800342017838, + "learning_rate": 1.0946294375035639e-05, + "loss": 0.1503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1522529125213623, + "step": 3020, + "valid_targets_mean": 4937.9, + "valid_targets_min": 820 + }, + { + "epoch": 4.801587301587301, + "grad_norm": 0.5205141553470364, + "learning_rate": 1.087578685293674e-05, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15628096461296082, + "step": 3025, + "valid_targets_mean": 5111.9, + "valid_targets_min": 295 + }, + { + "epoch": 4.809523809523809, + "grad_norm": 0.4077302471049976, + "learning_rate": 1.080542224406015e-05, + "loss": 0.1458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14736653864383698, + "step": 3030, + "valid_targets_mean": 5978.1, + "valid_targets_min": 522 + }, + { + "epoch": 4.817460317460317, + "grad_norm": 0.4091248100895989, + "learning_rate": 1.0735201650531915e-05, + "loss": 0.1473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1352832168340683, + "step": 3035, + "valid_targets_mean": 5664.2, + "valid_targets_min": 2353 + }, + { + "epoch": 4.825396825396825, + "grad_norm": 0.3968540500697524, + "learning_rate": 1.066512617222235e-05, + "loss": 0.1401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12752054631710052, + "step": 3040, + "valid_targets_mean": 6093.6, + "valid_targets_min": 1203 + }, + { + "epoch": 4.833333333333333, + "grad_norm": 0.3642854376590935, + "learning_rate": 1.059519690672884e-05, + "loss": 0.1447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15932929515838623, + "step": 3045, + "valid_targets_mean": 7128.1, + "valid_targets_min": 2558 + }, + { + "epoch": 4.841269841269841, + "grad_norm": 0.38602348967427114, + "learning_rate": 1.0525414949358614e-05, + "loss": 0.1473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1197904720902443, + "step": 3050, + "valid_targets_mean": 5888.0, + "valid_targets_min": 2560 + }, + { + "epoch": 4.849206349206349, + "grad_norm": 0.510022835675847, + "learning_rate": 1.0455781393111613e-05, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14147284626960754, + "step": 3055, + "valid_targets_mean": 5739.3, + "valid_targets_min": 1793 + }, + { + "epoch": 4.857142857142857, + "grad_norm": 0.6751258587414498, + "learning_rate": 1.0386297328663353e-05, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16436898708343506, + "step": 3060, + "valid_targets_mean": 6290.2, + "valid_targets_min": 440 + }, + { + "epoch": 4.865079365079366, + "grad_norm": 0.4424084854510193, + "learning_rate": 1.0316963844347843e-05, + "loss": 0.1522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15425124764442444, + "step": 3065, + "valid_targets_mean": 5844.6, + "valid_targets_min": 2170 + }, + { + "epoch": 4.8730158730158735, + "grad_norm": 0.4168794838811193, + "learning_rate": 1.0247782026140576e-05, + "loss": 0.1461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13188251852989197, + "step": 3070, + "valid_targets_mean": 5396.5, + "valid_targets_min": 1348 + }, + { + "epoch": 4.880952380952381, + "grad_norm": 0.49315142334278483, + "learning_rate": 1.017875295764144e-05, + "loss": 0.1503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13740713894367218, + "step": 3075, + "valid_targets_mean": 5882.2, + "valid_targets_min": 538 + }, + { + "epoch": 4.888888888888889, + "grad_norm": 0.4543689569217225, + "learning_rate": 1.0109877720057818e-05, + "loss": 0.1567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16015523672103882, + "step": 3080, + "valid_targets_mean": 5451.6, + "valid_targets_min": 2056 + }, + { + "epoch": 4.896825396825397, + "grad_norm": 0.36802504019714555, + "learning_rate": 1.0041157392187651e-05, + "loss": 0.145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11829912662506104, + "step": 3085, + "valid_targets_mean": 5831.1, + "valid_targets_min": 2830 + }, + { + "epoch": 4.904761904761905, + "grad_norm": 0.3966134482676806, + "learning_rate": 9.972593050402471e-06, + "loss": 0.1412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14243818819522858, + "step": 3090, + "valid_targets_mean": 6406.8, + "valid_targets_min": 1428 + }, + { + "epoch": 4.912698412698413, + "grad_norm": 0.4183961944811586, + "learning_rate": 9.904185768630612e-06, + "loss": 0.1418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16794633865356445, + "step": 3095, + "valid_targets_mean": 6009.8, + "valid_targets_min": 3070 + }, + { + "epoch": 4.920634920634921, + "grad_norm": 0.44212723748685295, + "learning_rate": 9.835936618340377e-06, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14964662492275238, + "step": 3100, + "valid_targets_mean": 4590.1, + "valid_targets_min": 1122 + }, + { + "epoch": 4.928571428571429, + "grad_norm": 0.41070340267056166, + "learning_rate": 9.76784666852323e-06, + "loss": 0.1447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12387900054454803, + "step": 3105, + "valid_targets_mean": 6402.7, + "valid_targets_min": 3540 + }, + { + "epoch": 4.936507936507937, + "grad_norm": 0.44233711675764437, + "learning_rate": 9.699916985677062e-06, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1574784517288208, + "step": 3110, + "valid_targets_mean": 4382.4, + "valid_targets_min": 920 + }, + { + "epoch": 4.944444444444445, + "grad_norm": 0.46887051592133433, + "learning_rate": 9.6321486337895e-06, + "loss": 0.151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15827694535255432, + "step": 3115, + "valid_targets_mean": 5222.6, + "valid_targets_min": 1313 + }, + { + "epoch": 4.9523809523809526, + "grad_norm": 0.41511980613828253, + "learning_rate": 9.564542674321228e-06, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14313764870166779, + "step": 3120, + "valid_targets_mean": 5663.2, + "valid_targets_min": 800 + }, + { + "epoch": 4.9603174603174605, + "grad_norm": 0.4621864142346036, + "learning_rate": 9.49710016618937e-06, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15151920914649963, + "step": 3125, + "valid_targets_mean": 5380.6, + "valid_targets_min": 313 + }, + { + "epoch": 4.968253968253968, + "grad_norm": 0.48527019717345327, + "learning_rate": 9.429822165750893e-06, + "loss": 0.1509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14661577343940735, + "step": 3130, + "valid_targets_mean": 4398.2, + "valid_targets_min": 633 + }, + { + "epoch": 4.976190476190476, + "grad_norm": 0.3831996919641483, + "learning_rate": 9.36270972678607e-06, + "loss": 0.1398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13461735844612122, + "step": 3135, + "valid_targets_mean": 6052.1, + "valid_targets_min": 1578 + }, + { + "epoch": 4.984126984126984, + "grad_norm": 0.4100072649060219, + "learning_rate": 9.295763900481977e-06, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12445595860481262, + "step": 3140, + "valid_targets_mean": 5427.4, + "valid_targets_min": 307 + }, + { + "epoch": 4.992063492063492, + "grad_norm": 0.46254552204793853, + "learning_rate": 9.22898573541602e-06, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16388916969299316, + "step": 3145, + "valid_targets_mean": 5861.9, + "valid_targets_min": 2467 + }, + { + "epoch": 5.0, + "grad_norm": 0.40795669884699387, + "learning_rate": 9.162376277539513e-06, + "loss": 0.1439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15090236067771912, + "step": 3150, + "valid_targets_mean": 5592.5, + "valid_targets_min": 1990 + }, + { + "epoch": 5.007936507936508, + "grad_norm": 0.3857823068068869, + "learning_rate": 9.095936570161301e-06, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14320944249629974, + "step": 3155, + "valid_targets_mean": 5639.6, + "valid_targets_min": 260 + }, + { + "epoch": 5.015873015873016, + "grad_norm": 0.5435449453461013, + "learning_rate": 9.029667653931411e-06, + "loss": 0.1325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1401306390762329, + "step": 3160, + "valid_targets_mean": 5760.2, + "valid_targets_min": 371 + }, + { + "epoch": 5.023809523809524, + "grad_norm": 0.6789857642195729, + "learning_rate": 8.96357056682475e-06, + "loss": 0.1367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12801998853683472, + "step": 3165, + "valid_targets_mean": 5610.3, + "valid_targets_min": 522 + }, + { + "epoch": 5.031746031746032, + "grad_norm": 0.38873846063838485, + "learning_rate": 8.897646344124882e-06, + "loss": 0.1304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11531123518943787, + "step": 3170, + "valid_targets_mean": 6343.7, + "valid_targets_min": 2461 + }, + { + "epoch": 5.0396825396825395, + "grad_norm": 0.4632360566976383, + "learning_rate": 8.83189601840773e-06, + "loss": 0.1416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14523249864578247, + "step": 3175, + "valid_targets_mean": 5589.1, + "valid_targets_min": 1310 + }, + { + "epoch": 5.0476190476190474, + "grad_norm": 0.44480606265136036, + "learning_rate": 8.766320619525511e-06, + "loss": 0.1405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1437515914440155, + "step": 3180, + "valid_targets_mean": 5137.6, + "valid_targets_min": 1450 + }, + { + "epoch": 5.055555555555555, + "grad_norm": 0.40620681435947337, + "learning_rate": 8.700921174590525e-06, + "loss": 0.1357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13447782397270203, + "step": 3185, + "valid_targets_mean": 6346.1, + "valid_targets_min": 4150 + }, + { + "epoch": 5.063492063492063, + "grad_norm": 0.46662620571823504, + "learning_rate": 8.63569870795907e-06, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14662285149097443, + "step": 3190, + "valid_targets_mean": 6060.2, + "valid_targets_min": 1308 + }, + { + "epoch": 5.071428571428571, + "grad_norm": 0.4654130995805731, + "learning_rate": 8.570654241215466e-06, + "loss": 0.1393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14438563585281372, + "step": 3195, + "valid_targets_mean": 5318.1, + "valid_targets_min": 1095 + }, + { + "epoch": 5.079365079365079, + "grad_norm": 0.40240677129462593, + "learning_rate": 8.505788793155978e-06, + "loss": 0.1285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11840318888425827, + "step": 3200, + "valid_targets_mean": 6147.7, + "valid_targets_min": 403 + }, + { + "epoch": 5.087301587301587, + "grad_norm": 0.5552645220273652, + "learning_rate": 8.441103379772893e-06, + "loss": 0.1433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15520301461219788, + "step": 3205, + "valid_targets_mean": 5761.9, + "valid_targets_min": 1220 + }, + { + "epoch": 5.095238095238095, + "grad_norm": 0.4847744229611747, + "learning_rate": 8.376599014238605e-06, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12781260907649994, + "step": 3210, + "valid_targets_mean": 5049.5, + "valid_targets_min": 2182 + }, + { + "epoch": 5.103174603174603, + "grad_norm": 0.4448058887555173, + "learning_rate": 8.312276706889738e-06, + "loss": 0.1409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14810214936733246, + "step": 3215, + "valid_targets_mean": 4893.2, + "valid_targets_min": 712 + }, + { + "epoch": 5.111111111111111, + "grad_norm": 0.3769447002552102, + "learning_rate": 8.24813746521133e-06, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11988615989685059, + "step": 3220, + "valid_targets_mean": 6354.1, + "valid_targets_min": 552 + }, + { + "epoch": 5.119047619047619, + "grad_norm": 0.40775763269992454, + "learning_rate": 8.184182293821046e-06, + "loss": 0.1335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13774478435516357, + "step": 3225, + "valid_targets_mean": 6155.4, + "valid_targets_min": 1769 + }, + { + "epoch": 5.1269841269841265, + "grad_norm": 0.4621791306803943, + "learning_rate": 8.120412194453442e-06, + "loss": 0.1297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13131381571292877, + "step": 3230, + "valid_targets_mean": 5662.6, + "valid_targets_min": 1704 + }, + { + "epoch": 5.134920634920635, + "grad_norm": 0.47477190317994294, + "learning_rate": 8.056828165944282e-06, + "loss": 0.1351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11765207350254059, + "step": 3235, + "valid_targets_mean": 4793.4, + "valid_targets_min": 556 + }, + { + "epoch": 5.142857142857143, + "grad_norm": 0.4211817590749248, + "learning_rate": 7.993431204214883e-06, + "loss": 0.144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.140112042427063, + "step": 3240, + "valid_targets_mean": 6042.8, + "valid_targets_min": 2069 + }, + { + "epoch": 5.150793650793651, + "grad_norm": 0.4100200812051378, + "learning_rate": 7.93022230225652e-06, + "loss": 0.1364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1306135654449463, + "step": 3245, + "valid_targets_mean": 5446.9, + "valid_targets_min": 1408 + }, + { + "epoch": 5.158730158730159, + "grad_norm": 0.4654477832902761, + "learning_rate": 7.867202450114892e-06, + "loss": 0.1469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1470104455947876, + "step": 3250, + "valid_targets_mean": 4906.9, + "valid_targets_min": 483 + }, + { + "epoch": 5.166666666666667, + "grad_norm": 0.523468892447528, + "learning_rate": 7.804372634874582e-06, + "loss": 0.1356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13795553147792816, + "step": 3255, + "valid_targets_mean": 4284.5, + "valid_targets_min": 1195 + }, + { + "epoch": 5.174603174603175, + "grad_norm": 0.6078996381505655, + "learning_rate": 7.74173384064359e-06, + "loss": 0.1355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1462775468826294, + "step": 3260, + "valid_targets_mean": 5041.4, + "valid_targets_min": 1305 + }, + { + "epoch": 5.182539682539683, + "grad_norm": 0.4607675067323951, + "learning_rate": 7.679287048537987e-06, + "loss": 0.1433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1391090750694275, + "step": 3265, + "valid_targets_mean": 5745.2, + "valid_targets_min": 336 + }, + { + "epoch": 5.190476190476191, + "grad_norm": 0.4923341770949091, + "learning_rate": 7.617033236666469e-06, + "loss": 0.1331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1512683480978012, + "step": 3270, + "valid_targets_mean": 4856.6, + "valid_targets_min": 860 + }, + { + "epoch": 5.198412698412699, + "grad_norm": 0.4823935955118556, + "learning_rate": 7.55497338011506e-06, + "loss": 0.133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12928101420402527, + "step": 3275, + "valid_targets_mean": 5083.1, + "valid_targets_min": 674 + }, + { + "epoch": 5.2063492063492065, + "grad_norm": 0.4571859304803118, + "learning_rate": 7.493108450931879e-06, + "loss": 0.1409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.147546648979187, + "step": 3280, + "valid_targets_mean": 5298.1, + "valid_targets_min": 584 + }, + { + "epoch": 5.214285714285714, + "grad_norm": 0.4656323081387472, + "learning_rate": 7.4314394181118636e-06, + "loss": 0.1455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13368697464466095, + "step": 3285, + "valid_targets_mean": 5323.8, + "valid_targets_min": 1007 + }, + { + "epoch": 5.222222222222222, + "grad_norm": 0.47778320363976773, + "learning_rate": 7.369967247581611e-06, + "loss": 0.1363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14050470292568207, + "step": 3290, + "valid_targets_mean": 4988.6, + "valid_targets_min": 1255 + }, + { + "epoch": 5.23015873015873, + "grad_norm": 0.47185902185121587, + "learning_rate": 7.3086929021842575e-06, + "loss": 0.1366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14221060276031494, + "step": 3295, + "valid_targets_mean": 4921.3, + "valid_targets_min": 778 + }, + { + "epoch": 5.238095238095238, + "grad_norm": 0.40677949622794063, + "learning_rate": 7.247617341664384e-06, + "loss": 0.141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15191778540611267, + "step": 3300, + "valid_targets_mean": 6783.2, + "valid_targets_min": 2108 + }, + { + "epoch": 5.246031746031746, + "grad_norm": 0.5597275095189963, + "learning_rate": 7.186741522652994e-06, + "loss": 0.1366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1412215381860733, + "step": 3305, + "valid_targets_mean": 5306.6, + "valid_targets_min": 1796 + }, + { + "epoch": 5.253968253968254, + "grad_norm": 0.4136667615907917, + "learning_rate": 7.12606639865252e-06, + "loss": 0.1349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13404718041419983, + "step": 3310, + "valid_targets_mean": 5412.3, + "valid_targets_min": 838 + }, + { + "epoch": 5.261904761904762, + "grad_norm": 0.5072282174178516, + "learning_rate": 7.065592920021893e-06, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1653522104024887, + "step": 3315, + "valid_targets_mean": 5170.6, + "valid_targets_min": 1293 + }, + { + "epoch": 5.26984126984127, + "grad_norm": 0.43649425013960674, + "learning_rate": 7.005322033961679e-06, + "loss": 0.1421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14359837770462036, + "step": 3320, + "valid_targets_mean": 5731.9, + "valid_targets_min": 2041 + }, + { + "epoch": 5.277777777777778, + "grad_norm": 0.4609844808793299, + "learning_rate": 6.945254684499185e-06, + "loss": 0.1388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14887037873268127, + "step": 3325, + "valid_targets_mean": 5072.8, + "valid_targets_min": 565 + }, + { + "epoch": 5.285714285714286, + "grad_norm": 0.4423681058460165, + "learning_rate": 6.8853918124737274e-06, + "loss": 0.1378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1400473713874817, + "step": 3330, + "valid_targets_mean": 5419.5, + "valid_targets_min": 461 + }, + { + "epoch": 5.2936507936507935, + "grad_norm": 0.4833740463102983, + "learning_rate": 6.825734355521898e-06, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1373916119337082, + "step": 3335, + "valid_targets_mean": 5413.9, + "valid_targets_min": 285 + }, + { + "epoch": 5.301587301587301, + "grad_norm": 0.46607539050074326, + "learning_rate": 6.766283248062817e-06, + "loss": 0.1444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14503991603851318, + "step": 3340, + "valid_targets_mean": 5005.1, + "valid_targets_min": 1372 + }, + { + "epoch": 5.309523809523809, + "grad_norm": 0.46602580090763035, + "learning_rate": 6.707039421283559e-06, + "loss": 0.1362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1407843828201294, + "step": 3345, + "valid_targets_mean": 4867.7, + "valid_targets_min": 279 + }, + { + "epoch": 5.317460317460317, + "grad_norm": 0.4977380166852099, + "learning_rate": 6.648003803124559e-06, + "loss": 0.1417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18442052602767944, + "step": 3350, + "valid_targets_mean": 4517.1, + "valid_targets_min": 528 + }, + { + "epoch": 5.325396825396825, + "grad_norm": 0.45300213871690015, + "learning_rate": 6.589177318265047e-06, + "loss": 0.133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13406887650489807, + "step": 3355, + "valid_targets_mean": 5391.9, + "valid_targets_min": 501 + }, + { + "epoch": 5.333333333333333, + "grad_norm": 0.4255524573366613, + "learning_rate": 6.53056088810857e-06, + "loss": 0.1469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14955641329288483, + "step": 3360, + "valid_targets_mean": 5542.8, + "valid_targets_min": 2458 + }, + { + "epoch": 5.341269841269841, + "grad_norm": 0.5534775234199173, + "learning_rate": 6.472155430768608e-06, + "loss": 0.1378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12088638544082642, + "step": 3365, + "valid_targets_mean": 6366.4, + "valid_targets_min": 1428 + }, + { + "epoch": 5.349206349206349, + "grad_norm": 0.4364437395513567, + "learning_rate": 6.413961861054132e-06, + "loss": 0.1329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15652699768543243, + "step": 3370, + "valid_targets_mean": 5471.2, + "valid_targets_min": 591 + }, + { + "epoch": 5.357142857142857, + "grad_norm": 0.45770813360412227, + "learning_rate": 6.3559810904553095e-06, + "loss": 0.1361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14036116003990173, + "step": 3375, + "valid_targets_mean": 5520.9, + "valid_targets_min": 256 + }, + { + "epoch": 5.365079365079365, + "grad_norm": 0.4153555346953972, + "learning_rate": 6.298214027129219e-06, + "loss": 0.1341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13151246309280396, + "step": 3380, + "valid_targets_mean": 6010.9, + "valid_targets_min": 263 + }, + { + "epoch": 5.3730158730158735, + "grad_norm": 0.4451350722143499, + "learning_rate": 6.240661575885629e-06, + "loss": 0.1374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1685609072446823, + "step": 3385, + "valid_targets_mean": 5214.2, + "valid_targets_min": 763 + }, + { + "epoch": 5.380952380952381, + "grad_norm": 0.46292596088927307, + "learning_rate": 6.183324638172819e-06, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15203514695167542, + "step": 3390, + "valid_targets_mean": 5266.0, + "valid_targets_min": 861 + }, + { + "epoch": 5.388888888888889, + "grad_norm": 0.4676464235157195, + "learning_rate": 6.126204112063463e-06, + "loss": 0.1353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13088193535804749, + "step": 3395, + "valid_targets_mean": 4737.1, + "valid_targets_min": 1121 + }, + { + "epoch": 5.396825396825397, + "grad_norm": 0.6797962177962652, + "learning_rate": 6.069300892240564e-06, + "loss": 0.1514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16591420769691467, + "step": 3400, + "valid_targets_mean": 4606.3, + "valid_targets_min": 377 + }, + { + "epoch": 5.404761904761905, + "grad_norm": 0.47267093685643496, + "learning_rate": 6.0126158699834625e-06, + "loss": 0.146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13353607058525085, + "step": 3405, + "valid_targets_mean": 5052.6, + "valid_targets_min": 2020 + }, + { + "epoch": 5.412698412698413, + "grad_norm": 0.428442687908954, + "learning_rate": 5.956149933153816e-06, + "loss": 0.1318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12273760139942169, + "step": 3410, + "valid_targets_mean": 5588.0, + "valid_targets_min": 2053 + }, + { + "epoch": 5.420634920634921, + "grad_norm": 0.4412367255518039, + "learning_rate": 5.899903966181751e-06, + "loss": 0.1398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14523178339004517, + "step": 3415, + "valid_targets_mean": 5505.2, + "valid_targets_min": 2042 + }, + { + "epoch": 5.428571428571429, + "grad_norm": 0.43781568976305363, + "learning_rate": 5.843878850052007e-06, + "loss": 0.1261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14099499583244324, + "step": 3420, + "valid_targets_mean": 5586.9, + "valid_targets_min": 839 + }, + { + "epoch": 5.436507936507937, + "grad_norm": 0.46059817758019467, + "learning_rate": 5.788075462290084e-06, + "loss": 0.1333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1432957500219345, + "step": 3425, + "valid_targets_mean": 4740.2, + "valid_targets_min": 705 + }, + { + "epoch": 5.444444444444445, + "grad_norm": 0.415134511085044, + "learning_rate": 5.732494676948554e-06, + "loss": 0.1303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12942150235176086, + "step": 3430, + "valid_targets_mean": 6054.6, + "valid_targets_min": 608 + }, + { + "epoch": 5.4523809523809526, + "grad_norm": 0.41001838037825894, + "learning_rate": 5.677137364593363e-06, + "loss": 0.1379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14375941455364227, + "step": 3435, + "valid_targets_mean": 5782.4, + "valid_targets_min": 2362 + }, + { + "epoch": 5.4603174603174605, + "grad_norm": 0.4462203270648477, + "learning_rate": 5.622004392290163e-06, + "loss": 0.1409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13974609971046448, + "step": 3440, + "valid_targets_mean": 5166.1, + "valid_targets_min": 1105 + }, + { + "epoch": 5.468253968253968, + "grad_norm": 0.5621286314149347, + "learning_rate": 5.567096623590758e-06, + "loss": 0.1432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14251437783241272, + "step": 3445, + "valid_targets_mean": 5521.8, + "valid_targets_min": 1822 + }, + { + "epoch": 5.476190476190476, + "grad_norm": 0.3951253389087575, + "learning_rate": 5.512414918519573e-06, + "loss": 0.1375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.138666570186615, + "step": 3450, + "valid_targets_mean": 6410.9, + "valid_targets_min": 2903 + }, + { + "epoch": 5.484126984126984, + "grad_norm": 0.42978600879401685, + "learning_rate": 5.457960133560179e-06, + "loss": 0.1395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13873498141765594, + "step": 3455, + "valid_targets_mean": 5563.0, + "valid_targets_min": 2443 + }, + { + "epoch": 5.492063492063492, + "grad_norm": 0.4763755663272107, + "learning_rate": 5.403733121641883e-06, + "loss": 0.1413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14260432124137878, + "step": 3460, + "valid_targets_mean": 5387.6, + "valid_targets_min": 1680 + }, + { + "epoch": 5.5, + "grad_norm": 0.45865214891995293, + "learning_rate": 5.349734732126366e-06, + "loss": 0.1299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15055881440639496, + "step": 3465, + "valid_targets_mean": 5297.8, + "valid_targets_min": 247 + }, + { + "epoch": 5.507936507936508, + "grad_norm": 0.4739801326379873, + "learning_rate": 5.295965810794376e-06, + "loss": 0.1415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1341158002614975, + "step": 3470, + "valid_targets_mean": 4671.8, + "valid_targets_min": 1946 + }, + { + "epoch": 5.515873015873016, + "grad_norm": 0.4054624348563083, + "learning_rate": 5.2424271998324895e-06, + "loss": 0.1305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13239628076553345, + "step": 3475, + "valid_targets_mean": 5538.2, + "valid_targets_min": 2317 + }, + { + "epoch": 5.523809523809524, + "grad_norm": 0.47822327586671515, + "learning_rate": 5.189119737819912e-06, + "loss": 0.1359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13307338953018188, + "step": 3480, + "valid_targets_mean": 5326.6, + "valid_targets_min": 1927 + }, + { + "epoch": 5.531746031746032, + "grad_norm": 0.45640673731159975, + "learning_rate": 5.136044259715342e-06, + "loss": 0.1411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1569247543811798, + "step": 3485, + "valid_targets_mean": 5192.1, + "valid_targets_min": 1402 + }, + { + "epoch": 5.5396825396825395, + "grad_norm": 0.47606206142121826, + "learning_rate": 5.083201596843905e-06, + "loss": 0.1301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13270564377307892, + "step": 3490, + "valid_targets_mean": 5579.8, + "valid_targets_min": 1802 + }, + { + "epoch": 5.5476190476190474, + "grad_norm": 0.48207429223420034, + "learning_rate": 5.030592576884117e-06, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1469019204378128, + "step": 3495, + "valid_targets_mean": 5300.8, + "valid_targets_min": 578 + }, + { + "epoch": 5.555555555555555, + "grad_norm": 0.46920831785621236, + "learning_rate": 4.978218023854928e-06, + "loss": 0.1355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14404523372650146, + "step": 3500, + "valid_targets_mean": 4518.9, + "valid_targets_min": 229 + }, + { + "epoch": 5.563492063492063, + "grad_norm": 0.42912173167963297, + "learning_rate": 4.926078758102834e-06, + "loss": 0.1417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1521531045436859, + "step": 3505, + "valid_targets_mean": 5723.4, + "valid_targets_min": 1020 + }, + { + "epoch": 5.571428571428571, + "grad_norm": 0.503182430334019, + "learning_rate": 4.87417559628897e-06, + "loss": 0.1304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13956725597381592, + "step": 3510, + "valid_targets_mean": 5755.2, + "valid_targets_min": 342 + }, + { + "epoch": 5.579365079365079, + "grad_norm": 0.483615956667945, + "learning_rate": 4.822509351376399e-06, + "loss": 0.1484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15917867422103882, + "step": 3515, + "valid_targets_mean": 4745.8, + "valid_targets_min": 871 + }, + { + "epoch": 5.587301587301587, + "grad_norm": 0.44843746346045477, + "learning_rate": 4.7710808326173115e-06, + "loss": 0.1376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14553232491016388, + "step": 3520, + "valid_targets_mean": 5322.7, + "valid_targets_min": 949 + }, + { + "epoch": 5.595238095238095, + "grad_norm": 0.4446428825967545, + "learning_rate": 4.719890845540385e-06, + "loss": 0.1414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1446400284767151, + "step": 3525, + "valid_targets_mean": 5464.6, + "valid_targets_min": 1876 + }, + { + "epoch": 5.603174603174603, + "grad_norm": 0.44461133087445714, + "learning_rate": 4.668940191938156e-06, + "loss": 0.1374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12743496894836426, + "step": 3530, + "valid_targets_mean": 5617.1, + "valid_targets_min": 867 + }, + { + "epoch": 5.611111111111111, + "grad_norm": 0.46893046808084937, + "learning_rate": 4.618229669854464e-06, + "loss": 0.1357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1406087577342987, + "step": 3535, + "valid_targets_mean": 5965.8, + "valid_targets_min": 2904 + }, + { + "epoch": 5.619047619047619, + "grad_norm": 0.49547441030459144, + "learning_rate": 4.567760073571947e-06, + "loss": 0.136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14891457557678223, + "step": 3540, + "valid_targets_mean": 4223.9, + "valid_targets_min": 1503 + }, + { + "epoch": 5.6269841269841265, + "grad_norm": 0.48451923979691575, + "learning_rate": 4.51753219359961e-06, + "loss": 0.1392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14008775353431702, + "step": 3545, + "valid_targets_mean": 5037.8, + "valid_targets_min": 2496 + }, + { + "epoch": 5.634920634920634, + "grad_norm": 0.4290885471458542, + "learning_rate": 4.467546816660433e-06, + "loss": 0.1383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1463758647441864, + "step": 3550, + "valid_targets_mean": 6069.6, + "valid_targets_min": 2782 + }, + { + "epoch": 5.642857142857143, + "grad_norm": 0.4422867985916354, + "learning_rate": 4.417804725679058e-06, + "loss": 0.1441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1522657573223114, + "step": 3555, + "valid_targets_mean": 7388.6, + "valid_targets_min": 2410 + }, + { + "epoch": 5.650793650793651, + "grad_norm": 0.4326771572943575, + "learning_rate": 4.368306699769518e-06, + "loss": 0.1362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13901498913764954, + "step": 3560, + "valid_targets_mean": 5687.3, + "valid_targets_min": 930 + }, + { + "epoch": 5.658730158730159, + "grad_norm": 0.5184808560608285, + "learning_rate": 4.319053514223033e-06, + "loss": 0.1301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12393245846033096, + "step": 3565, + "valid_targets_mean": 4805.2, + "valid_targets_min": 277 + }, + { + "epoch": 5.666666666666667, + "grad_norm": 0.4747228961326593, + "learning_rate": 4.270045940495879e-06, + "loss": 0.151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1519310176372528, + "step": 3570, + "valid_targets_mean": 4909.6, + "valid_targets_min": 525 + }, + { + "epoch": 5.674603174603175, + "grad_norm": 0.4817480251050087, + "learning_rate": 4.221284746197292e-06, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14879171550273895, + "step": 3575, + "valid_targets_mean": 5205.3, + "valid_targets_min": 512 + }, + { + "epoch": 5.682539682539683, + "grad_norm": 0.5206783763038453, + "learning_rate": 4.172770695077437e-06, + "loss": 0.1366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16022273898124695, + "step": 3580, + "valid_targets_mean": 4187.7, + "valid_targets_min": 754 + }, + { + "epoch": 5.690476190476191, + "grad_norm": 0.42530942051464743, + "learning_rate": 4.124504547015487e-06, + "loss": 0.1403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1300957351922989, + "step": 3585, + "valid_targets_mean": 6382.3, + "valid_targets_min": 2232 + }, + { + "epoch": 5.698412698412699, + "grad_norm": 0.4773846019671863, + "learning_rate": 4.0764870580076675e-06, + "loss": 0.1527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17422491312026978, + "step": 3590, + "valid_targets_mean": 5224.0, + "valid_targets_min": 1170 + }, + { + "epoch": 5.7063492063492065, + "grad_norm": 0.3861656168881239, + "learning_rate": 4.0287189801554304e-06, + "loss": 0.1436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12745806574821472, + "step": 3595, + "valid_targets_mean": 6368.2, + "valid_targets_min": 1124 + }, + { + "epoch": 5.714285714285714, + "grad_norm": 0.41849044348707654, + "learning_rate": 3.98120106165371e-06, + "loss": 0.1425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13698044419288635, + "step": 3600, + "valid_targets_mean": 5723.8, + "valid_targets_min": 2689 + }, + { + "epoch": 5.722222222222222, + "grad_norm": 0.5006534631008478, + "learning_rate": 3.933934046779164e-06, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15041181445121765, + "step": 3605, + "valid_targets_mean": 4726.2, + "valid_targets_min": 625 + }, + { + "epoch": 5.73015873015873, + "grad_norm": 0.5390021957931794, + "learning_rate": 3.886918675878513e-06, + "loss": 0.1352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15215769410133362, + "step": 3610, + "valid_targets_mean": 5078.9, + "valid_targets_min": 355 + }, + { + "epoch": 5.738095238095238, + "grad_norm": 0.4242218655638763, + "learning_rate": 3.840155685356983e-06, + "loss": 0.1313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12816202640533447, + "step": 3615, + "valid_targets_mean": 5724.1, + "valid_targets_min": 2290 + }, + { + "epoch": 5.746031746031746, + "grad_norm": 0.40905901981368425, + "learning_rate": 3.793645807666735e-06, + "loss": 0.1399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12471656501293182, + "step": 3620, + "valid_targets_mean": 6163.4, + "valid_targets_min": 447 + }, + { + "epoch": 5.753968253968254, + "grad_norm": 0.5134066612062572, + "learning_rate": 3.747389771295411e-06, + "loss": 0.1316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13810807466506958, + "step": 3625, + "valid_targets_mean": 5215.5, + "valid_targets_min": 320 + }, + { + "epoch": 5.761904761904762, + "grad_norm": 0.44176531273164726, + "learning_rate": 3.701388300754709e-06, + "loss": 0.149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15570278465747833, + "step": 3630, + "valid_targets_mean": 5831.4, + "valid_targets_min": 1928 + }, + { + "epoch": 5.76984126984127, + "grad_norm": 0.4552213038987976, + "learning_rate": 3.6556421165690516e-06, + "loss": 0.132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13112439215183258, + "step": 3635, + "valid_targets_mean": 5127.6, + "valid_targets_min": 1412 + }, + { + "epoch": 5.777777777777778, + "grad_norm": 0.5475459293945621, + "learning_rate": 3.610151935264288e-06, + "loss": 0.1321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13923147320747375, + "step": 3640, + "valid_targets_mean": 5058.1, + "valid_targets_min": 1271 + }, + { + "epoch": 5.785714285714286, + "grad_norm": 0.4062383720325422, + "learning_rate": 3.5649184693564797e-06, + "loss": 0.1413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12178117781877518, + "step": 3645, + "valid_targets_mean": 6188.1, + "valid_targets_min": 2820 + }, + { + "epoch": 5.7936507936507935, + "grad_norm": 0.41210477208428037, + "learning_rate": 3.5199424273407277e-06, + "loss": 0.1329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11573171615600586, + "step": 3650, + "valid_targets_mean": 5608.1, + "valid_targets_min": 291 + }, + { + "epoch": 5.801587301587301, + "grad_norm": 0.39026902409095005, + "learning_rate": 3.4752245136801065e-06, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11422628164291382, + "step": 3655, + "valid_targets_mean": 6121.1, + "valid_targets_min": 919 + }, + { + "epoch": 5.809523809523809, + "grad_norm": 0.44136839976284375, + "learning_rate": 3.430765428794569e-06, + "loss": 0.1426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15087169408798218, + "step": 3660, + "valid_targets_mean": 5458.3, + "valid_targets_min": 663 + }, + { + "epoch": 5.817460317460317, + "grad_norm": 0.47272488901004456, + "learning_rate": 3.3865658690500424e-06, + "loss": 0.1342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12730719149112701, + "step": 3665, + "valid_targets_mean": 5489.8, + "valid_targets_min": 266 + }, + { + "epoch": 5.825396825396825, + "grad_norm": 0.4286583074647361, + "learning_rate": 3.34262652674749e-06, + "loss": 0.1399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13272640109062195, + "step": 3670, + "valid_targets_mean": 5855.3, + "valid_targets_min": 3739 + }, + { + "epoch": 5.833333333333333, + "grad_norm": 0.4961674424548416, + "learning_rate": 3.2989480901120684e-06, + "loss": 0.1305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14517953991889954, + "step": 3675, + "valid_targets_mean": 5103.6, + "valid_targets_min": 629 + }, + { + "epoch": 5.841269841269841, + "grad_norm": 0.5038921133963972, + "learning_rate": 3.2555312432823283e-06, + "loss": 0.1381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14607466757297516, + "step": 3680, + "valid_targets_mean": 4695.1, + "valid_targets_min": 2224 + }, + { + "epoch": 5.849206349206349, + "grad_norm": 0.4516355692524723, + "learning_rate": 3.2123766662995572e-06, + "loss": 0.1382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13942056894302368, + "step": 3685, + "valid_targets_mean": 5577.4, + "valid_targets_min": 1295 + }, + { + "epoch": 5.857142857142857, + "grad_norm": 0.43990400419076664, + "learning_rate": 3.1694850350970686e-06, + "loss": 0.1373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14388282597064972, + "step": 3690, + "valid_targets_mean": 5896.1, + "valid_targets_min": 1277 + }, + { + "epoch": 5.865079365079366, + "grad_norm": 0.4430961156309604, + "learning_rate": 3.1268570214896265e-06, + "loss": 0.1454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16903355717658997, + "step": 3695, + "valid_targets_mean": 5768.0, + "valid_targets_min": 309 + }, + { + "epoch": 5.8730158730158735, + "grad_norm": 0.4687413821050724, + "learning_rate": 3.0844932931629602e-06, + "loss": 0.1389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12664330005645752, + "step": 3700, + "valid_targets_mean": 5469.9, + "valid_targets_min": 582 + }, + { + "epoch": 5.880952380952381, + "grad_norm": 0.4251478406093825, + "learning_rate": 3.0423945136632626e-06, + "loss": 0.1371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14676138758659363, + "step": 3705, + "valid_targets_mean": 6292.8, + "valid_targets_min": 1166 + }, + { + "epoch": 5.888888888888889, + "grad_norm": 0.4418913815124218, + "learning_rate": 3.000561342386814e-06, + "loss": 0.1459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1596686840057373, + "step": 3710, + "valid_targets_mean": 5276.0, + "valid_targets_min": 656 + }, + { + "epoch": 5.896825396825397, + "grad_norm": 0.46832139188754646, + "learning_rate": 2.9589944345696596e-06, + "loss": 0.1346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12101615965366364, + "step": 3715, + "valid_targets_mean": 4913.4, + "valid_targets_min": 343 + }, + { + "epoch": 5.904761904761905, + "grad_norm": 0.4657929056680499, + "learning_rate": 2.9176944412773322e-06, + "loss": 0.149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17338143289089203, + "step": 3720, + "valid_targets_mean": 5412.4, + "valid_targets_min": 554 + }, + { + "epoch": 5.912698412698413, + "grad_norm": 0.4449595143164407, + "learning_rate": 2.876662009394673e-06, + "loss": 0.1346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15727762877941132, + "step": 3725, + "valid_targets_mean": 5328.9, + "valid_targets_min": 643 + }, + { + "epoch": 5.920634920634921, + "grad_norm": 0.39696814679103, + "learning_rate": 2.8358977816156796e-06, + "loss": 0.1426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11638738214969635, + "step": 3730, + "valid_targets_mean": 6867.2, + "valid_targets_min": 3676 + }, + { + "epoch": 5.928571428571429, + "grad_norm": 0.47195292364800606, + "learning_rate": 2.7954023964334485e-06, + "loss": 0.1349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1505923867225647, + "step": 3735, + "valid_targets_mean": 5757.2, + "valid_targets_min": 704 + }, + { + "epoch": 5.936507936507937, + "grad_norm": 0.4486759780229958, + "learning_rate": 2.7551764881301955e-06, + "loss": 0.1246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13487672805786133, + "step": 3740, + "valid_targets_mean": 5926.0, + "valid_targets_min": 1024 + }, + { + "epoch": 5.944444444444445, + "grad_norm": 0.45150456684299833, + "learning_rate": 2.715220686767268e-06, + "loss": 0.1325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13316667079925537, + "step": 3745, + "valid_targets_mean": 5609.4, + "valid_targets_min": 2528 + }, + { + "epoch": 5.9523809523809526, + "grad_norm": 0.4612924420161714, + "learning_rate": 2.6755356181753247e-06, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17791566252708435, + "step": 3750, + "valid_targets_mean": 5493.9, + "valid_targets_min": 600 + }, + { + "epoch": 5.9603174603174605, + "grad_norm": 0.4466679622369211, + "learning_rate": 2.6361219039445328e-06, + "loss": 0.1425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1417991816997528, + "step": 3755, + "valid_targets_mean": 5531.8, + "valid_targets_min": 3560 + }, + { + "epoch": 5.968253968253968, + "grad_norm": 0.46290951572004435, + "learning_rate": 2.5969801614147838e-06, + "loss": 0.1455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1562907099723816, + "step": 3760, + "valid_targets_mean": 5726.4, + "valid_targets_min": 325 + }, + { + "epoch": 5.976190476190476, + "grad_norm": 0.41130105503656605, + "learning_rate": 2.558111003666075e-06, + "loss": 0.1451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13213077187538147, + "step": 3765, + "valid_targets_mean": 5965.7, + "valid_targets_min": 1546 + }, + { + "epoch": 5.984126984126984, + "grad_norm": 0.4305429547314029, + "learning_rate": 2.519515039508893e-06, + "loss": 0.1328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11605265736579895, + "step": 3770, + "valid_targets_mean": 5509.0, + "valid_targets_min": 290 + }, + { + "epoch": 5.992063492063492, + "grad_norm": 0.43615187162481, + "learning_rate": 2.481192873474667e-06, + "loss": 0.1365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13950824737548828, + "step": 3775, + "valid_targets_mean": 5780.9, + "valid_targets_min": 1479 + }, + { + "epoch": 6.0, + "grad_norm": 0.39743183112610925, + "learning_rate": 2.4431451058062928e-06, + "loss": 0.1351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1134757548570633, + "step": 3780, + "valid_targets_mean": 5882.9, + "valid_targets_min": 2457 + }, + { + "epoch": 6.007936507936508, + "grad_norm": 0.4320579605085731, + "learning_rate": 2.4053723324487677e-06, + "loss": 0.1308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12739768624305725, + "step": 3785, + "valid_targets_mean": 4568.0, + "valid_targets_min": 652 + }, + { + "epoch": 6.015873015873016, + "grad_norm": 0.45806291042546377, + "learning_rate": 2.3678751450398196e-06, + "loss": 0.1348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1493530422449112, + "step": 3790, + "valid_targets_mean": 5264.6, + "valid_targets_min": 901 + }, + { + "epoch": 6.023809523809524, + "grad_norm": 0.4440865317653884, + "learning_rate": 2.330654130900656e-06, + "loss": 0.1324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13022762537002563, + "step": 3795, + "valid_targets_mean": 4942.4, + "valid_targets_min": 2199 + }, + { + "epoch": 6.031746031746032, + "grad_norm": 0.4289001750038118, + "learning_rate": 2.2937098730267572e-06, + "loss": 0.134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11798284202814102, + "step": 3800, + "valid_targets_mean": 5962.4, + "valid_targets_min": 656 + }, + { + "epoch": 6.0396825396825395, + "grad_norm": 0.4804416865960379, + "learning_rate": 2.2570429500787604e-06, + "loss": 0.1359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13769641518592834, + "step": 3805, + "valid_targets_mean": 4876.4, + "valid_targets_min": 2078 + }, + { + "epoch": 6.0476190476190474, + "grad_norm": 0.4248137219988032, + "learning_rate": 2.2206539363733738e-06, + "loss": 0.1265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09792445600032806, + "step": 3810, + "valid_targets_mean": 6735.6, + "valid_targets_min": 2875 + }, + { + "epoch": 6.055555555555555, + "grad_norm": 0.4458277839797382, + "learning_rate": 2.1845434018744038e-06, + "loss": 0.1336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13032405078411102, + "step": 3815, + "valid_targets_mean": 5100.5, + "valid_targets_min": 1711 + }, + { + "epoch": 6.063492063492063, + "grad_norm": 0.46870997489479654, + "learning_rate": 2.148711912183803e-06, + "loss": 0.1349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1340586543083191, + "step": 3820, + "valid_targets_mean": 6257.2, + "valid_targets_min": 3713 + }, + { + "epoch": 6.071428571428571, + "grad_norm": 0.43220936389411174, + "learning_rate": 2.1131600285328458e-06, + "loss": 0.1372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13491111993789673, + "step": 3825, + "valid_targets_mean": 5652.9, + "valid_targets_min": 2291 + }, + { + "epoch": 6.079365079365079, + "grad_norm": 0.4938619349035986, + "learning_rate": 2.0778883077732903e-06, + "loss": 0.1221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12590594589710236, + "step": 3830, + "valid_targets_mean": 5837.1, + "valid_targets_min": 1277 + }, + { + "epoch": 6.087301587301587, + "grad_norm": 0.4461842204628299, + "learning_rate": 2.0428973023686983e-06, + "loss": 0.1301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12995757162570953, + "step": 3835, + "valid_targets_mean": 5839.1, + "valid_targets_min": 2495 + }, + { + "epoch": 6.095238095238095, + "grad_norm": 0.4514403709392358, + "learning_rate": 2.0081875603857726e-06, + "loss": 0.1271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11519353836774826, + "step": 3840, + "valid_targets_mean": 5285.4, + "valid_targets_min": 1198 + }, + { + "epoch": 6.103174603174603, + "grad_norm": 0.5872250143318944, + "learning_rate": 1.973759625485743e-06, + "loss": 0.1295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12013471126556396, + "step": 3845, + "valid_targets_mean": 3976.3, + "valid_targets_min": 1509 + }, + { + "epoch": 6.111111111111111, + "grad_norm": 0.5053201570927205, + "learning_rate": 1.9396140369159e-06, + "loss": 0.1314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14312024414539337, + "step": 3850, + "valid_targets_mean": 5034.4, + "valid_targets_min": 712 + }, + { + "epoch": 6.119047619047619, + "grad_norm": 0.4254491047353564, + "learning_rate": 1.9057513295011087e-06, + "loss": 0.1249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11980589479207993, + "step": 3855, + "valid_targets_mean": 5230.5, + "valid_targets_min": 618 + }, + { + "epoch": 6.1269841269841265, + "grad_norm": 0.45350019220553667, + "learning_rate": 1.8721720336354487e-06, + "loss": 0.1306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1343933343887329, + "step": 3860, + "valid_targets_mean": 5310.0, + "valid_targets_min": 861 + }, + { + "epoch": 6.134920634920635, + "grad_norm": 0.44908680436264514, + "learning_rate": 1.8388766752739017e-06, + "loss": 0.1298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13214536011219025, + "step": 3865, + "valid_targets_mean": 4819.6, + "valid_targets_min": 360 + }, + { + "epoch": 6.142857142857143, + "grad_norm": 0.43728782320626974, + "learning_rate": 1.805865775924116e-06, + "loss": 0.137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12990088760852814, + "step": 3870, + "valid_targets_mean": 5668.4, + "valid_targets_min": 2900 + }, + { + "epoch": 6.150793650793651, + "grad_norm": 0.46855550237931853, + "learning_rate": 1.7731398526382416e-06, + "loss": 0.1278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11653086543083191, + "step": 3875, + "valid_targets_mean": 5082.9, + "valid_targets_min": 278 + }, + { + "epoch": 6.158730158730159, + "grad_norm": 0.4633118493365201, + "learning_rate": 1.7406994180048231e-06, + "loss": 0.1302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1214178055524826, + "step": 3880, + "valid_targets_mean": 5079.4, + "valid_targets_min": 596 + }, + { + "epoch": 6.166666666666667, + "grad_norm": 0.4324845887042187, + "learning_rate": 1.7085449801407783e-06, + "loss": 0.1242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11891638487577438, + "step": 3885, + "valid_targets_mean": 5482.2, + "valid_targets_min": 630 + }, + { + "epoch": 6.174603174603175, + "grad_norm": 0.4593597812117515, + "learning_rate": 1.67667704268343e-06, + "loss": 0.1302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12844383716583252, + "step": 3890, + "valid_targets_mean": 5304.3, + "valid_targets_min": 968 + }, + { + "epoch": 6.182539682539683, + "grad_norm": 1.0409294151451973, + "learning_rate": 1.6450961047826353e-06, + "loss": 0.127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14141665399074554, + "step": 3895, + "valid_targets_mean": 4083.4, + "valid_targets_min": 287 + }, + { + "epoch": 6.190476190476191, + "grad_norm": 0.4147176475336051, + "learning_rate": 1.6138026610929446e-06, + "loss": 0.1377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12204622477293015, + "step": 3900, + "valid_targets_mean": 6026.9, + "valid_targets_min": 1789 + }, + { + "epoch": 6.198412698412699, + "grad_norm": 0.7688468165385852, + "learning_rate": 1.5827972017658732e-06, + "loss": 0.1434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14009518921375275, + "step": 3905, + "valid_targets_mean": 5232.4, + "valid_targets_min": 609 + }, + { + "epoch": 6.2063492063492065, + "grad_norm": 0.5742634408323547, + "learning_rate": 1.5520802124422108e-06, + "loss": 0.1284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11271405220031738, + "step": 3910, + "valid_targets_mean": 5412.1, + "valid_targets_min": 677 + }, + { + "epoch": 6.214285714285714, + "grad_norm": 0.4434637011482024, + "learning_rate": 1.5216521742444236e-06, + "loss": 0.1266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15959657728672028, + "step": 3915, + "valid_targets_mean": 5545.5, + "valid_targets_min": 366 + }, + { + "epoch": 6.222222222222222, + "grad_norm": 0.4473257762920866, + "learning_rate": 1.491513563769118e-06, + "loss": 0.1277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13596297800540924, + "step": 3920, + "valid_targets_mean": 6079.2, + "valid_targets_min": 1352 + }, + { + "epoch": 6.23015873015873, + "grad_norm": 0.40218956890406166, + "learning_rate": 1.4616648530795673e-06, + "loss": 0.1367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13123834133148193, + "step": 3925, + "valid_targets_mean": 6588.4, + "valid_targets_min": 1264 + }, + { + "epoch": 6.238095238095238, + "grad_norm": 0.4665917089898694, + "learning_rate": 1.432106509698319e-06, + "loss": 0.1403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14458967745304108, + "step": 3930, + "valid_targets_mean": 5620.0, + "valid_targets_min": 2860 + }, + { + "epoch": 6.246031746031746, + "grad_norm": 0.4804278259544632, + "learning_rate": 1.4028389965998867e-06, + "loss": 0.1322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13646945357322693, + "step": 3935, + "valid_targets_mean": 4918.8, + "valid_targets_min": 2109 + }, + { + "epoch": 6.253968253968254, + "grad_norm": 0.4573452936493578, + "learning_rate": 1.3738627722034848e-06, + "loss": 0.1378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12479956448078156, + "step": 3940, + "valid_targets_mean": 5664.0, + "valid_targets_min": 890 + }, + { + "epoch": 6.261904761904762, + "grad_norm": 0.4112194821979111, + "learning_rate": 1.345178290365845e-06, + "loss": 0.1303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12601318955421448, + "step": 3945, + "valid_targets_mean": 5897.8, + "valid_targets_min": 2441 + }, + { + "epoch": 6.26984126984127, + "grad_norm": 0.43770788181094994, + "learning_rate": 1.3167860003741218e-06, + "loss": 0.1333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13687068223953247, + "step": 3950, + "valid_targets_mean": 5294.8, + "valid_targets_min": 872 + }, + { + "epoch": 6.277777777777778, + "grad_norm": 0.43651984734406524, + "learning_rate": 1.2886863469388389e-06, + "loss": 0.1357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13095057010650635, + "step": 3955, + "valid_targets_mean": 5041.6, + "valid_targets_min": 946 + }, + { + "epoch": 6.285714285714286, + "grad_norm": 0.4407006248372022, + "learning_rate": 1.2608797701869425e-06, + "loss": 0.1454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13327287137508392, + "step": 3960, + "valid_targets_mean": 5152.8, + "valid_targets_min": 968 + }, + { + "epoch": 6.2936507936507935, + "grad_norm": 0.4830750093647113, + "learning_rate": 1.2333667056548881e-06, + "loss": 0.1391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12351542711257935, + "step": 3965, + "valid_targets_mean": 4494.3, + "valid_targets_min": 299 + }, + { + "epoch": 6.301587301587301, + "grad_norm": 0.4721717415354756, + "learning_rate": 1.2061475842818337e-06, + "loss": 0.1466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15659388899803162, + "step": 3970, + "valid_targets_mean": 5026.2, + "valid_targets_min": 1775 + }, + { + "epoch": 6.309523809523809, + "grad_norm": 0.42311493337432965, + "learning_rate": 1.1792228324028776e-06, + "loss": 0.1289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15567630529403687, + "step": 3975, + "valid_targets_mean": 5791.2, + "valid_targets_min": 3515 + }, + { + "epoch": 6.317460317460317, + "grad_norm": 1.4585742478929205, + "learning_rate": 1.152592871742395e-06, + "loss": 0.133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13825181126594543, + "step": 3980, + "valid_targets_mean": 5244.9, + "valid_targets_min": 558 + }, + { + "epoch": 6.325396825396825, + "grad_norm": 0.461117951811656, + "learning_rate": 1.1262581194074152e-06, + "loss": 0.131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11796639859676361, + "step": 3985, + "valid_targets_mean": 5554.2, + "valid_targets_min": 2245 + }, + { + "epoch": 6.333333333333333, + "grad_norm": 0.47633924217416224, + "learning_rate": 1.100218987881112e-06, + "loss": 0.1325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12955845892429352, + "step": 3990, + "valid_targets_mean": 6029.9, + "valid_targets_min": 2670 + }, + { + "epoch": 6.341269841269841, + "grad_norm": 0.45154703338066743, + "learning_rate": 1.0744758850163085e-06, + "loss": 0.1408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13291522860527039, + "step": 3995, + "valid_targets_mean": 5422.9, + "valid_targets_min": 539 + }, + { + "epoch": 6.349206349206349, + "grad_norm": 0.5119900863580499, + "learning_rate": 1.0490292140291247e-06, + "loss": 0.1382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13096119463443756, + "step": 4000, + "valid_targets_mean": 5480.8, + "valid_targets_min": 291 + }, + { + "epoch": 6.357142857142857, + "grad_norm": 0.5126206204332201, + "learning_rate": 1.0238793734926467e-06, + "loss": 0.13, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14911425113677979, + "step": 4005, + "valid_targets_mean": 4797.0, + "valid_targets_min": 1266 + }, + { + "epoch": 6.365079365079365, + "grad_norm": 0.4880357246785836, + "learning_rate": 9.990267573306745e-07, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1387421041727066, + "step": 4010, + "valid_targets_mean": 4544.9, + "valid_targets_min": 690 + }, + { + "epoch": 6.3730158730158735, + "grad_norm": 0.40558138018757806, + "learning_rate": 9.744717548115613e-07, + "loss": 0.1278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12570391595363617, + "step": 4015, + "valid_targets_mean": 6268.6, + "valid_targets_min": 3291 + }, + { + "epoch": 6.380952380952381, + "grad_norm": 0.41810620070165866, + "learning_rate": 9.502147505421244e-07, + "loss": 0.1351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1270408034324646, + "step": 4020, + "valid_targets_mean": 6078.6, + "valid_targets_min": 797 + }, + { + "epoch": 6.388888888888889, + "grad_norm": 0.4496644146071595, + "learning_rate": 9.262561244616108e-07, + "loss": 0.133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14739742875099182, + "step": 4025, + "valid_targets_mean": 5835.8, + "valid_targets_min": 783 + }, + { + "epoch": 6.396825396825397, + "grad_norm": 0.45373871441149133, + "learning_rate": 9.025962518357323e-07, + "loss": 0.1241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11742935329675674, + "step": 4030, + "valid_targets_mean": 4566.4, + "valid_targets_min": 290 + }, + { + "epoch": 6.404761904761905, + "grad_norm": 0.4617395683182264, + "learning_rate": 8.792355032508282e-07, + "loss": 0.1362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12306074798107147, + "step": 4035, + "valid_targets_mean": 5038.2, + "valid_targets_min": 626 + }, + { + "epoch": 6.412698412698413, + "grad_norm": 0.4644079416505595, + "learning_rate": 8.561742446080168e-07, + "loss": 0.1282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1280827522277832, + "step": 4040, + "valid_targets_mean": 5057.6, + "valid_targets_min": 663 + }, + { + "epoch": 6.420634920634921, + "grad_norm": 0.425213082782362, + "learning_rate": 8.334128371174955e-07, + "loss": 0.1418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14657798409461975, + "step": 4045, + "valid_targets_mean": 6001.6, + "valid_targets_min": 582 + }, + { + "epoch": 6.428571428571429, + "grad_norm": 0.428425934530618, + "learning_rate": 8.109516372928605e-07, + "loss": 0.1309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13139371573925018, + "step": 4050, + "valid_targets_mean": 5651.9, + "valid_targets_min": 2298 + }, + { + "epoch": 6.436507936507937, + "grad_norm": 0.4405821827658464, + "learning_rate": 7.887909969455366e-07, + "loss": 0.1326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12469398230314255, + "step": 4055, + "valid_targets_mean": 5376.2, + "valid_targets_min": 267 + }, + { + "epoch": 6.444444444444445, + "grad_norm": 0.5057128403035726, + "learning_rate": 7.669312631792758e-07, + "loss": 0.14, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14254549145698547, + "step": 4060, + "valid_targets_mean": 5674.9, + "valid_targets_min": 2241 + }, + { + "epoch": 6.4523809523809526, + "grad_norm": 0.615770615958985, + "learning_rate": 7.453727783846876e-07, + "loss": 0.1439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1313585788011551, + "step": 4065, + "valid_targets_mean": 5325.8, + "valid_targets_min": 1267 + }, + { + "epoch": 6.4603174603174605, + "grad_norm": 0.4640004582080486, + "learning_rate": 7.241158802339065e-07, + "loss": 0.1406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14738494157791138, + "step": 4070, + "valid_targets_mean": 5742.9, + "valid_targets_min": 297 + }, + { + "epoch": 6.468253968253968, + "grad_norm": 0.44707812759358856, + "learning_rate": 7.031609016753016e-07, + "loss": 0.1372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1283096969127655, + "step": 4075, + "valid_targets_mean": 5941.6, + "valid_targets_min": 2715 + }, + { + "epoch": 6.476190476190476, + "grad_norm": 0.5025741831025621, + "learning_rate": 6.825081709282377e-07, + "loss": 0.1384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14924189448356628, + "step": 4080, + "valid_targets_mean": 5481.9, + "valid_targets_min": 673 + }, + { + "epoch": 6.484126984126984, + "grad_norm": 0.4928322290987037, + "learning_rate": 6.62158011477958e-07, + "loss": 0.1391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14254453778266907, + "step": 4085, + "valid_targets_mean": 5396.4, + "valid_targets_min": 1013 + }, + { + "epoch": 6.492063492063492, + "grad_norm": 0.4463090392869717, + "learning_rate": 6.421107420705097e-07, + "loss": 0.139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1291046142578125, + "step": 4090, + "valid_targets_mean": 5304.8, + "valid_targets_min": 1268 + }, + { + "epoch": 6.5, + "grad_norm": 0.41940627052314144, + "learning_rate": 6.223666767077508e-07, + "loss": 0.1369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1368085891008377, + "step": 4095, + "valid_targets_mean": 5791.9, + "valid_targets_min": 824 + }, + { + "epoch": 6.507936507936508, + "grad_norm": 0.38633976220944566, + "learning_rate": 6.029261246424267e-07, + "loss": 0.1254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11674937605857849, + "step": 4100, + "valid_targets_mean": 7173.1, + "valid_targets_min": 3321 + }, + { + "epoch": 6.515873015873016, + "grad_norm": 0.5710755866104514, + "learning_rate": 5.837893903733394e-07, + "loss": 0.1398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14703723788261414, + "step": 4105, + "valid_targets_mean": 4992.6, + "valid_targets_min": 251 + }, + { + "epoch": 6.523809523809524, + "grad_norm": 0.42790485901259206, + "learning_rate": 5.649567736405681e-07, + "loss": 0.1345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11689166724681854, + "step": 4110, + "valid_targets_mean": 5996.3, + "valid_targets_min": 3073 + }, + { + "epoch": 6.531746031746032, + "grad_norm": 0.3978542753238711, + "learning_rate": 5.464285694207672e-07, + "loss": 0.1299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12328889966011047, + "step": 4115, + "valid_targets_mean": 6366.9, + "valid_targets_min": 2921 + }, + { + "epoch": 6.5396825396825395, + "grad_norm": 0.4803261314786915, + "learning_rate": 5.282050679225714e-07, + "loss": 0.1332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13136357069015503, + "step": 4120, + "valid_targets_mean": 5456.8, + "valid_targets_min": 1746 + }, + { + "epoch": 6.5476190476190474, + "grad_norm": 0.3906973340150284, + "learning_rate": 5.102865545820245e-07, + "loss": 0.13, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12256291508674622, + "step": 4125, + "valid_targets_mean": 6359.6, + "valid_targets_min": 2305 + }, + { + "epoch": 6.555555555555555, + "grad_norm": 0.4059594097865994, + "learning_rate": 4.926733100581182e-07, + "loss": 0.1378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11908716708421707, + "step": 4130, + "valid_targets_mean": 6355.2, + "valid_targets_min": 325 + }, + { + "epoch": 6.563492063492063, + "grad_norm": 0.5832130007483879, + "learning_rate": 4.7536561022840213e-07, + "loss": 0.1346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13830284774303436, + "step": 4135, + "valid_targets_mean": 4896.4, + "valid_targets_min": 1408 + }, + { + "epoch": 6.571428571428571, + "grad_norm": 0.4059800621113446, + "learning_rate": 4.5836372618464964e-07, + "loss": 0.1336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12423016875982285, + "step": 4140, + "valid_targets_mean": 6594.7, + "valid_targets_min": 3811 + }, + { + "epoch": 6.579365079365079, + "grad_norm": 0.4185374589094962, + "learning_rate": 4.416679242286215e-07, + "loss": 0.1279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13419991731643677, + "step": 4145, + "valid_targets_mean": 6087.1, + "valid_targets_min": 2433 + }, + { + "epoch": 6.587301587301587, + "grad_norm": 0.5103870835544566, + "learning_rate": 4.2527846586789547e-07, + "loss": 0.136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14000031352043152, + "step": 4150, + "valid_targets_mean": 4625.3, + "valid_targets_min": 221 + }, + { + "epoch": 6.595238095238095, + "grad_norm": 0.5765993905041488, + "learning_rate": 4.0919560781176317e-07, + "loss": 0.1312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13032563030719757, + "step": 4155, + "valid_targets_mean": 5195.4, + "valid_targets_min": 1443 + }, + { + "epoch": 6.603174603174603, + "grad_norm": 0.48593436180980204, + "learning_rate": 3.934196019672176e-07, + "loss": 0.1267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13097088038921356, + "step": 4160, + "valid_targets_mean": 4735.8, + "valid_targets_min": 1686 + }, + { + "epoch": 6.611111111111111, + "grad_norm": 0.4395609496182445, + "learning_rate": 3.779506954349965e-07, + "loss": 0.1357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12398695945739746, + "step": 4165, + "valid_targets_mean": 6392.1, + "valid_targets_min": 2819 + }, + { + "epoch": 6.619047619047619, + "grad_norm": 0.47600560032316025, + "learning_rate": 3.6278913050572076e-07, + "loss": 0.1377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1410660743713379, + "step": 4170, + "valid_targets_mean": 4714.9, + "valid_targets_min": 921 + }, + { + "epoch": 6.6269841269841265, + "grad_norm": 0.4626328635010138, + "learning_rate": 3.4793514465610414e-07, + "loss": 0.1227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12245401740074158, + "step": 4175, + "valid_targets_mean": 5662.9, + "valid_targets_min": 1293 + }, + { + "epoch": 6.634920634920634, + "grad_norm": 0.4856477089680751, + "learning_rate": 3.3338897054521205e-07, + "loss": 0.1445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12981770932674408, + "step": 4180, + "valid_targets_mean": 6161.9, + "valid_targets_min": 1672 + }, + { + "epoch": 6.642857142857143, + "grad_norm": 0.4382225877662096, + "learning_rate": 3.191508360108464e-07, + "loss": 0.1265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12940582633018494, + "step": 4185, + "valid_targets_mean": 5541.3, + "valid_targets_min": 525 + }, + { + "epoch": 6.650793650793651, + "grad_norm": 0.4467797690671947, + "learning_rate": 3.0522096406595536e-07, + "loss": 0.1249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12461797893047333, + "step": 4190, + "valid_targets_mean": 5350.9, + "valid_targets_min": 1531 + }, + { + "epoch": 6.658730158730159, + "grad_norm": 0.4315640371338101, + "learning_rate": 2.9159957289514926e-07, + "loss": 0.1307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1185673251748085, + "step": 4195, + "valid_targets_mean": 5560.6, + "valid_targets_min": 2700 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.48404703695546003, + "learning_rate": 2.782868758512791e-07, + "loss": 0.1374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13700520992279053, + "step": 4200, + "valid_targets_mean": 5088.4, + "valid_targets_min": 708 + }, + { + "epoch": 6.674603174603175, + "grad_norm": 0.4561326307439442, + "learning_rate": 2.6528308145210125e-07, + "loss": 0.1336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14443397521972656, + "step": 4205, + "valid_targets_mean": 5388.9, + "valid_targets_min": 987 + }, + { + "epoch": 6.682539682539683, + "grad_norm": 0.4432184683642521, + "learning_rate": 2.525883933770046e-07, + "loss": 0.1488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13129255175590515, + "step": 4210, + "valid_targets_mean": 6560.8, + "valid_targets_min": 2872 + }, + { + "epoch": 6.690476190476191, + "grad_norm": 0.4428498025907347, + "learning_rate": 2.402030104638198e-07, + "loss": 0.1281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1193118616938591, + "step": 4215, + "valid_targets_mean": 5178.1, + "valid_targets_min": 1066 + }, + { + "epoch": 6.698412698412699, + "grad_norm": 0.4342898053436231, + "learning_rate": 2.2812712670571502e-07, + "loss": 0.1307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13040482997894287, + "step": 4220, + "valid_targets_mean": 5622.1, + "valid_targets_min": 2496 + }, + { + "epoch": 6.7063492063492065, + "grad_norm": 2.0764732196910196, + "learning_rate": 2.1636093124814738e-07, + "loss": 0.136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12159843742847443, + "step": 4225, + "valid_targets_mean": 5518.6, + "valid_targets_min": 1795 + }, + { + "epoch": 6.714285714285714, + "grad_norm": 0.4586433877189939, + "learning_rate": 2.0490460838589855e-07, + "loss": 0.1337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12682956457138062, + "step": 4230, + "valid_targets_mean": 4948.1, + "valid_targets_min": 1032 + }, + { + "epoch": 6.722222222222222, + "grad_norm": 0.47132990810816733, + "learning_rate": 1.9375833756019923e-07, + "loss": 0.1348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15487657487392426, + "step": 4235, + "valid_targets_mean": 4774.9, + "valid_targets_min": 1168 + }, + { + "epoch": 6.73015873015873, + "grad_norm": 0.4124840094163903, + "learning_rate": 1.8292229335590716e-07, + "loss": 0.1351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11652678996324539, + "step": 4240, + "valid_targets_mean": 6393.1, + "valid_targets_min": 3197 + }, + { + "epoch": 6.738095238095238, + "grad_norm": 0.4350958770760526, + "learning_rate": 1.7239664549878688e-07, + "loss": 0.1297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1278618425130844, + "step": 4245, + "valid_targets_mean": 5711.4, + "valid_targets_min": 307 + }, + { + "epoch": 6.746031746031746, + "grad_norm": 0.4380389060922931, + "learning_rate": 1.6218155885283192e-07, + "loss": 0.1333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1238991990685463, + "step": 4250, + "valid_targets_mean": 5243.3, + "valid_targets_min": 1150 + }, + { + "epoch": 6.753968253968254, + "grad_norm": 0.45207585431063574, + "learning_rate": 1.5227719341769364e-07, + "loss": 0.1406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12772828340530396, + "step": 4255, + "valid_targets_mean": 6135.6, + "valid_targets_min": 2455 + }, + { + "epoch": 6.761904761904762, + "grad_norm": 0.48236608264345426, + "learning_rate": 1.4268370432618306e-07, + "loss": 0.1464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14505012333393097, + "step": 4260, + "valid_targets_mean": 4687.0, + "valid_targets_min": 764 + }, + { + "epoch": 6.76984126984127, + "grad_norm": 0.4233867675421516, + "learning_rate": 1.3340124184182178e-07, + "loss": 0.1289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12908919155597687, + "step": 4265, + "valid_targets_mean": 6025.6, + "valid_targets_min": 2257 + }, + { + "epoch": 6.777777777777778, + "grad_norm": 0.4877416792035469, + "learning_rate": 1.2442995135650393e-07, + "loss": 0.1296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15760302543640137, + "step": 4270, + "valid_targets_mean": 4828.7, + "valid_targets_min": 268 + }, + { + "epoch": 6.785714285714286, + "grad_norm": 0.6506490177567913, + "learning_rate": 1.1576997338821339e-07, + "loss": 0.1364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13028597831726074, + "step": 4275, + "valid_targets_mean": 5155.9, + "valid_targets_min": 845 + }, + { + "epoch": 6.7936507936507935, + "grad_norm": 0.4477970591498348, + "learning_rate": 1.0742144357882567e-07, + "loss": 0.1401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1585049331188202, + "step": 4280, + "valid_targets_mean": 5534.2, + "valid_targets_min": 310 + }, + { + "epoch": 6.801587301587301, + "grad_norm": 0.49507868999640176, + "learning_rate": 9.938449269197181e-08, + "loss": 0.1258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1410304307937622, + "step": 4285, + "valid_targets_mean": 4884.3, + "valid_targets_min": 447 + }, + { + "epoch": 6.809523809523809, + "grad_norm": 0.4842993936056907, + "learning_rate": 9.165924661100889e-08, + "loss": 0.1347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12479911744594574, + "step": 4290, + "valid_targets_mean": 5132.9, + "valid_targets_min": 397 + }, + { + "epoch": 6.817460317460317, + "grad_norm": 0.43326494078340105, + "learning_rate": 8.424582633703493e-08, + "loss": 0.1291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13402575254440308, + "step": 4295, + "valid_targets_mean": 5267.1, + "valid_targets_min": 1121 + }, + { + "epoch": 6.825396825396825, + "grad_norm": 0.39800154553874223, + "learning_rate": 7.714434798699933e-08, + "loss": 0.1228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12014246731996536, + "step": 4300, + "valid_targets_mean": 6144.5, + "valid_targets_min": 2815 + }, + { + "epoch": 6.833333333333333, + "grad_norm": 0.42208852936776803, + "learning_rate": 7.035492279187538e-08, + "loss": 0.1331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12092911452054977, + "step": 4305, + "valid_targets_mean": 5618.4, + "valid_targets_min": 855 + }, + { + "epoch": 6.841269841269841, + "grad_norm": 0.5061623759467122, + "learning_rate": 6.387765709493288e-08, + "loss": 0.1362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13144025206565857, + "step": 4310, + "valid_targets_mean": 5299.0, + "valid_targets_min": 737 + }, + { + "epoch": 6.849206349206349, + "grad_norm": 0.4703474965866674, + "learning_rate": 5.7712652350061515e-08, + "loss": 0.1368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14938583970069885, + "step": 4315, + "valid_targets_mean": 4459.2, + "valid_targets_min": 229 + }, + { + "epoch": 6.857142857142857, + "grad_norm": 0.4544153706399285, + "learning_rate": 5.186000512018341e-08, + "loss": 0.1304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13210490345954895, + "step": 4320, + "valid_targets_mean": 5968.1, + "valid_targets_min": 1966 + }, + { + "epoch": 6.865079365079366, + "grad_norm": 0.3951693224390717, + "learning_rate": 4.631980707574535e-08, + "loss": 0.1222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10764377564191818, + "step": 4325, + "valid_targets_mean": 6102.2, + "valid_targets_min": 281 + }, + { + "epoch": 6.8730158730158735, + "grad_norm": 0.47764153403255666, + "learning_rate": 4.10921449932733e-08, + "loss": 0.1415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13427546620368958, + "step": 4330, + "valid_targets_mean": 4686.6, + "valid_targets_min": 1301 + }, + { + "epoch": 6.880952380952381, + "grad_norm": 0.4686718928282114, + "learning_rate": 3.61771007540268e-08, + "loss": 0.1366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12770965695381165, + "step": 4335, + "valid_targets_mean": 5458.8, + "valid_targets_min": 632 + }, + { + "epoch": 6.888888888888889, + "grad_norm": 0.45215811836727576, + "learning_rate": 3.157475134270227e-08, + "loss": 0.1306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14995113015174866, + "step": 4340, + "valid_targets_mean": 5664.1, + "valid_targets_min": 3600 + }, + { + "epoch": 6.896825396825397, + "grad_norm": 0.49008507070012214, + "learning_rate": 2.728516884624277e-08, + "loss": 0.1288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13054564595222473, + "step": 4345, + "valid_targets_mean": 5089.6, + "valid_targets_min": 637 + }, + { + "epoch": 6.904761904761905, + "grad_norm": 0.48404319287292985, + "learning_rate": 2.3308420452690106e-08, + "loss": 0.1448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13617941737174988, + "step": 4350, + "valid_targets_mean": 4722.2, + "valid_targets_min": 1597 + }, + { + "epoch": 6.912698412698413, + "grad_norm": 0.40638533094484175, + "learning_rate": 1.9644568450147837e-08, + "loss": 0.1317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14109522104263306, + "step": 4355, + "valid_targets_mean": 6445.0, + "valid_targets_min": 3553 + }, + { + "epoch": 6.920634920634921, + "grad_norm": 0.4629617085369057, + "learning_rate": 1.6293670225799864e-08, + "loss": 0.1253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1260548233985901, + "step": 4360, + "valid_targets_mean": 5373.1, + "valid_targets_min": 543 + }, + { + "epoch": 6.928571428571429, + "grad_norm": 0.45402888146937276, + "learning_rate": 1.3255778265013342e-08, + "loss": 0.1365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14494284987449646, + "step": 4365, + "valid_targets_mean": 5725.3, + "valid_targets_min": 1550 + }, + { + "epoch": 6.936507936507937, + "grad_norm": 0.4086709277572136, + "learning_rate": 1.0530940150512703e-08, + "loss": 0.1352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1179996132850647, + "step": 4370, + "valid_targets_mean": 5990.4, + "valid_targets_min": 361 + }, + { + "epoch": 6.944444444444445, + "grad_norm": 0.42662410297325026, + "learning_rate": 8.119198561638009e-09, + "loss": 0.1466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14371398091316223, + "step": 4375, + "valid_targets_mean": 5997.8, + "valid_targets_min": 3437 + }, + { + "epoch": 6.9523809523809526, + "grad_norm": 0.42639557312324744, + "learning_rate": 6.020591273674381e-09, + "loss": 0.1317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1339547336101532, + "step": 4380, + "valid_targets_mean": 6416.2, + "valid_targets_min": 609 + }, + { + "epoch": 6.9603174603174605, + "grad_norm": 0.4092765044556627, + "learning_rate": 4.2351511572635835e-09, + "loss": 0.1359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10867651551961899, + "step": 4385, + "valid_targets_mean": 6632.2, + "valid_targets_min": 1370 + }, + { + "epoch": 6.968253968253968, + "grad_norm": 0.4951156195990528, + "learning_rate": 2.7629061778866597e-09, + "loss": 0.1432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1670551598072052, + "step": 4390, + "valid_targets_mean": 5063.4, + "valid_targets_min": 949 + }, + { + "epoch": 6.976190476190476, + "grad_norm": 0.5279789366650457, + "learning_rate": 1.603879395422059e-09, + "loss": 0.134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14463752508163452, + "step": 4395, + "valid_targets_mean": 4726.2, + "valid_targets_min": 638 + }, + { + "epoch": 6.984126984126984, + "grad_norm": 0.43870727411718197, + "learning_rate": 7.580889637925914e-10, + "loss": 0.1397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14193710684776306, + "step": 4400, + "valid_targets_mean": 5203.9, + "valid_targets_min": 767 + }, + { + "epoch": 6.992063492063492, + "grad_norm": 0.4604903629839532, + "learning_rate": 2.2554813067676705e-10, + "loss": 0.1268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1437646746635437, + "step": 4405, + "valid_targets_mean": 5522.9, + "valid_targets_min": 809 + }, + { + "epoch": 7.0, + "grad_norm": 0.4923560486700678, + "learning_rate": 6.265237300073778e-12, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15846839547157288, + "step": 4410, + "valid_targets_mean": 5295.1, + "valid_targets_min": 1258 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15846839547157288, + "step": 4410, + "total_flos": 1765744004825088.0, + "train_loss": 0.17469855595608147, + "train_runtime": 27741.8701, + "train_samples_per_second": 2.541, + "train_steps_per_second": 0.159, + "valid_targets_mean": 5295.1, + "valid_targets_min": 1258 + } + ], + "logging_steps": 5, + "max_steps": 4410, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1765744004825088.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}