| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 2814, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.012437810945273632, | |
| "grad_norm": 13.380951406456147, | |
| "learning_rate": 5.673758865248227e-07, | |
| "loss": 0.7822, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.7657574415206909, | |
| "step": 5, | |
| "valid_targets_mean": 4010.9, | |
| "valid_targets_min": 1767 | |
| }, | |
| { | |
| "epoch": 0.024875621890547265, | |
| "grad_norm": 12.545654526351226, | |
| "learning_rate": 1.276595744680851e-06, | |
| "loss": 0.8056, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.8114880919456482, | |
| "step": 10, | |
| "valid_targets_mean": 3892.8, | |
| "valid_targets_min": 2200 | |
| }, | |
| { | |
| "epoch": 0.03731343283582089, | |
| "grad_norm": 9.667916880642556, | |
| "learning_rate": 1.9858156028368797e-06, | |
| "loss": 0.7418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6924527287483215, | |
| "step": 15, | |
| "valid_targets_mean": 4415.2, | |
| "valid_targets_min": 1889 | |
| }, | |
| { | |
| "epoch": 0.04975124378109453, | |
| "grad_norm": 6.112913024339798, | |
| "learning_rate": 2.695035460992908e-06, | |
| "loss": 0.6973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.668594241142273, | |
| "step": 20, | |
| "valid_targets_mean": 3674.9, | |
| "valid_targets_min": 191 | |
| }, | |
| { | |
| "epoch": 0.06218905472636816, | |
| "grad_norm": 4.866305911621686, | |
| "learning_rate": 3.4042553191489363e-06, | |
| "loss": 0.6624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.644311785697937, | |
| "step": 25, | |
| "valid_targets_mean": 3697.1, | |
| "valid_targets_min": 355 | |
| }, | |
| { | |
| "epoch": 0.07462686567164178, | |
| "grad_norm": 4.31903460986635, | |
| "learning_rate": 4.113475177304965e-06, | |
| "loss": 0.6025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.569460391998291, | |
| "step": 30, | |
| "valid_targets_mean": 4055.9, | |
| "valid_targets_min": 1675 | |
| }, | |
| { | |
| "epoch": 0.08706467661691543, | |
| "grad_norm": 1.8781712293982455, | |
| "learning_rate": 4.822695035460993e-06, | |
| "loss": 0.5493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5322237014770508, | |
| "step": 35, | |
| "valid_targets_mean": 3793.4, | |
| "valid_targets_min": 1413 | |
| }, | |
| { | |
| "epoch": 0.09950248756218906, | |
| "grad_norm": 1.3648445070279718, | |
| "learning_rate": 5.531914893617022e-06, | |
| "loss": 0.5353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5094773769378662, | |
| "step": 40, | |
| "valid_targets_mean": 3781.3, | |
| "valid_targets_min": 1421 | |
| }, | |
| { | |
| "epoch": 0.11194029850746269, | |
| "grad_norm": 0.9598535483295971, | |
| "learning_rate": 6.24113475177305e-06, | |
| "loss": 0.4963, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4716443717479706, | |
| "step": 45, | |
| "valid_targets_mean": 4314.4, | |
| "valid_targets_min": 1362 | |
| }, | |
| { | |
| "epoch": 0.12437810945273632, | |
| "grad_norm": 0.8095195355924959, | |
| "learning_rate": 6.950354609929079e-06, | |
| "loss": 0.4842, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4684237241744995, | |
| "step": 50, | |
| "valid_targets_mean": 4619.2, | |
| "valid_targets_min": 2128 | |
| }, | |
| { | |
| "epoch": 0.13681592039800994, | |
| "grad_norm": 0.9535042747879249, | |
| "learning_rate": 7.659574468085107e-06, | |
| "loss": 0.4799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.502250611782074, | |
| "step": 55, | |
| "valid_targets_mean": 3554.8, | |
| "valid_targets_min": 2013 | |
| }, | |
| { | |
| "epoch": 0.14925373134328357, | |
| "grad_norm": 0.7738334376355982, | |
| "learning_rate": 8.368794326241135e-06, | |
| "loss": 0.4574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49030762910842896, | |
| "step": 60, | |
| "valid_targets_mean": 3948.2, | |
| "valid_targets_min": 2060 | |
| }, | |
| { | |
| "epoch": 0.16169154228855723, | |
| "grad_norm": 0.7133304690095129, | |
| "learning_rate": 9.078014184397164e-06, | |
| "loss": 0.4319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41507917642593384, | |
| "step": 65, | |
| "valid_targets_mean": 4287.1, | |
| "valid_targets_min": 2085 | |
| }, | |
| { | |
| "epoch": 0.17412935323383086, | |
| "grad_norm": 0.6718539217454166, | |
| "learning_rate": 9.787234042553192e-06, | |
| "loss": 0.4215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41251397132873535, | |
| "step": 70, | |
| "valid_targets_mean": 4147.2, | |
| "valid_targets_min": 2291 | |
| }, | |
| { | |
| "epoch": 0.1865671641791045, | |
| "grad_norm": 0.6888672437166001, | |
| "learning_rate": 1.049645390070922e-05, | |
| "loss": 0.422, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4331628978252411, | |
| "step": 75, | |
| "valid_targets_mean": 3984.5, | |
| "valid_targets_min": 1357 | |
| }, | |
| { | |
| "epoch": 0.19900497512437812, | |
| "grad_norm": 0.7612883410877886, | |
| "learning_rate": 1.120567375886525e-05, | |
| "loss": 0.3944, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3932691514492035, | |
| "step": 80, | |
| "valid_targets_mean": 3577.6, | |
| "valid_targets_min": 1628 | |
| }, | |
| { | |
| "epoch": 0.21144278606965175, | |
| "grad_norm": 0.6889250639530377, | |
| "learning_rate": 1.1914893617021277e-05, | |
| "loss": 0.3975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4268104135990143, | |
| "step": 85, | |
| "valid_targets_mean": 4420.2, | |
| "valid_targets_min": 2703 | |
| }, | |
| { | |
| "epoch": 0.22388059701492538, | |
| "grad_norm": 0.7389874437127749, | |
| "learning_rate": 1.2624113475177307e-05, | |
| "loss": 0.3817, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38703107833862305, | |
| "step": 90, | |
| "valid_targets_mean": 3759.1, | |
| "valid_targets_min": 2311 | |
| }, | |
| { | |
| "epoch": 0.236318407960199, | |
| "grad_norm": 0.6632832871080014, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.3705, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3471001982688904, | |
| "step": 95, | |
| "valid_targets_mean": 4546.7, | |
| "valid_targets_min": 2457 | |
| }, | |
| { | |
| "epoch": 0.24875621890547264, | |
| "grad_norm": 0.6773454789521931, | |
| "learning_rate": 1.4042553191489363e-05, | |
| "loss": 0.3705, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37445783615112305, | |
| "step": 100, | |
| "valid_targets_mean": 3990.8, | |
| "valid_targets_min": 1875 | |
| }, | |
| { | |
| "epoch": 0.26119402985074625, | |
| "grad_norm": 0.5679953534246097, | |
| "learning_rate": 1.475177304964539e-05, | |
| "loss": 0.35, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3382793068885803, | |
| "step": 105, | |
| "valid_targets_mean": 5048.9, | |
| "valid_targets_min": 1979 | |
| }, | |
| { | |
| "epoch": 0.2736318407960199, | |
| "grad_norm": 0.6335472342807328, | |
| "learning_rate": 1.546099290780142e-05, | |
| "loss": 0.3527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36811989545822144, | |
| "step": 110, | |
| "valid_targets_mean": 3544.6, | |
| "valid_targets_min": 2019 | |
| }, | |
| { | |
| "epoch": 0.2860696517412935, | |
| "grad_norm": 0.5873605193729509, | |
| "learning_rate": 1.6170212765957446e-05, | |
| "loss": 0.35, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3602096438407898, | |
| "step": 115, | |
| "valid_targets_mean": 4335.9, | |
| "valid_targets_min": 2097 | |
| }, | |
| { | |
| "epoch": 0.29850746268656714, | |
| "grad_norm": 0.6603180715772641, | |
| "learning_rate": 1.6879432624113476e-05, | |
| "loss": 0.3513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36116451025009155, | |
| "step": 120, | |
| "valid_targets_mean": 3368.5, | |
| "valid_targets_min": 1322 | |
| }, | |
| { | |
| "epoch": 0.31094527363184077, | |
| "grad_norm": 0.6177290895347114, | |
| "learning_rate": 1.7588652482269506e-05, | |
| "loss": 0.3454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35631102323532104, | |
| "step": 125, | |
| "valid_targets_mean": 4271.0, | |
| "valid_targets_min": 2572 | |
| }, | |
| { | |
| "epoch": 0.32338308457711445, | |
| "grad_norm": 0.5970279991046994, | |
| "learning_rate": 1.8297872340425533e-05, | |
| "loss": 0.3302, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3269408345222473, | |
| "step": 130, | |
| "valid_targets_mean": 4144.0, | |
| "valid_targets_min": 2433 | |
| }, | |
| { | |
| "epoch": 0.3358208955223881, | |
| "grad_norm": 0.6988436544880555, | |
| "learning_rate": 1.9007092198581563e-05, | |
| "loss": 0.3462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3329390287399292, | |
| "step": 135, | |
| "valid_targets_mean": 3773.1, | |
| "valid_targets_min": 1880 | |
| }, | |
| { | |
| "epoch": 0.3482587064676617, | |
| "grad_norm": 0.5663119370039671, | |
| "learning_rate": 1.971631205673759e-05, | |
| "loss": 0.3185, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33959251642227173, | |
| "step": 140, | |
| "valid_targets_mean": 5226.4, | |
| "valid_targets_min": 1685 | |
| }, | |
| { | |
| "epoch": 0.36069651741293535, | |
| "grad_norm": 0.7382656927862868, | |
| "learning_rate": 2.0425531914893616e-05, | |
| "loss": 0.3288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2998434901237488, | |
| "step": 145, | |
| "valid_targets_mean": 3405.1, | |
| "valid_targets_min": 1997 | |
| }, | |
| { | |
| "epoch": 0.373134328358209, | |
| "grad_norm": 0.6399598041440727, | |
| "learning_rate": 2.113475177304965e-05, | |
| "loss": 0.3285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3149760365486145, | |
| "step": 150, | |
| "valid_targets_mean": 4147.7, | |
| "valid_targets_min": 1051 | |
| }, | |
| { | |
| "epoch": 0.3855721393034826, | |
| "grad_norm": 0.6190347784845001, | |
| "learning_rate": 2.1843971631205676e-05, | |
| "loss": 0.3218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32194218039512634, | |
| "step": 155, | |
| "valid_targets_mean": 3982.0, | |
| "valid_targets_min": 1776 | |
| }, | |
| { | |
| "epoch": 0.39800995024875624, | |
| "grad_norm": 0.6629989034109828, | |
| "learning_rate": 2.2553191489361703e-05, | |
| "loss": 0.3105, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29769569635391235, | |
| "step": 160, | |
| "valid_targets_mean": 3653.1, | |
| "valid_targets_min": 2275 | |
| }, | |
| { | |
| "epoch": 0.41044776119402987, | |
| "grad_norm": 0.6705243113369573, | |
| "learning_rate": 2.326241134751773e-05, | |
| "loss": 0.3093, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31587380170822144, | |
| "step": 165, | |
| "valid_targets_mean": 4081.2, | |
| "valid_targets_min": 2034 | |
| }, | |
| { | |
| "epoch": 0.4228855721393035, | |
| "grad_norm": 0.9424068832053736, | |
| "learning_rate": 2.3971631205673763e-05, | |
| "loss": 0.3142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3107619285583496, | |
| "step": 170, | |
| "valid_targets_mean": 3648.3, | |
| "valid_targets_min": 1597 | |
| }, | |
| { | |
| "epoch": 0.43532338308457713, | |
| "grad_norm": 0.6511358668082933, | |
| "learning_rate": 2.468085106382979e-05, | |
| "loss": 0.3245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34112197160720825, | |
| "step": 175, | |
| "valid_targets_mean": 3819.4, | |
| "valid_targets_min": 1782 | |
| }, | |
| { | |
| "epoch": 0.44776119402985076, | |
| "grad_norm": 0.6476380044242603, | |
| "learning_rate": 2.5390070921985816e-05, | |
| "loss": 0.3162, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29842424392700195, | |
| "step": 180, | |
| "valid_targets_mean": 3789.5, | |
| "valid_targets_min": 1876 | |
| }, | |
| { | |
| "epoch": 0.4601990049751244, | |
| "grad_norm": 0.627368462104055, | |
| "learning_rate": 2.609929078014185e-05, | |
| "loss": 0.3129, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33267688751220703, | |
| "step": 185, | |
| "valid_targets_mean": 4517.8, | |
| "valid_targets_min": 228 | |
| }, | |
| { | |
| "epoch": 0.472636815920398, | |
| "grad_norm": 0.6466632344322205, | |
| "learning_rate": 2.6808510638297876e-05, | |
| "loss": 0.3025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29927682876586914, | |
| "step": 190, | |
| "valid_targets_mean": 4185.9, | |
| "valid_targets_min": 1492 | |
| }, | |
| { | |
| "epoch": 0.48507462686567165, | |
| "grad_norm": 0.6662800469045165, | |
| "learning_rate": 2.7517730496453903e-05, | |
| "loss": 0.3151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3514325022697449, | |
| "step": 195, | |
| "valid_targets_mean": 4175.0, | |
| "valid_targets_min": 443 | |
| }, | |
| { | |
| "epoch": 0.4975124378109453, | |
| "grad_norm": 0.6517689188685865, | |
| "learning_rate": 2.822695035460993e-05, | |
| "loss": 0.3084, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3168841302394867, | |
| "step": 200, | |
| "valid_targets_mean": 4177.4, | |
| "valid_targets_min": 853 | |
| }, | |
| { | |
| "epoch": 0.5099502487562189, | |
| "grad_norm": 0.6855031086226197, | |
| "learning_rate": 2.8936170212765963e-05, | |
| "loss": 0.297, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2854161858558655, | |
| "step": 205, | |
| "valid_targets_mean": 3589.5, | |
| "valid_targets_min": 1106 | |
| }, | |
| { | |
| "epoch": 0.5223880597014925, | |
| "grad_norm": 0.6720194490881026, | |
| "learning_rate": 2.964539007092199e-05, | |
| "loss": 0.2946, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2903139591217041, | |
| "step": 210, | |
| "valid_targets_mean": 3750.4, | |
| "valid_targets_min": 1685 | |
| }, | |
| { | |
| "epoch": 0.5348258706467661, | |
| "grad_norm": 0.6995838272080971, | |
| "learning_rate": 3.0354609929078016e-05, | |
| "loss": 0.3066, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29320502281188965, | |
| "step": 215, | |
| "valid_targets_mean": 3562.6, | |
| "valid_targets_min": 831 | |
| }, | |
| { | |
| "epoch": 0.5472636815920398, | |
| "grad_norm": 0.5930224509626889, | |
| "learning_rate": 3.1063829787234046e-05, | |
| "loss": 0.3155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33783021569252014, | |
| "step": 220, | |
| "valid_targets_mean": 4651.5, | |
| "valid_targets_min": 2381 | |
| }, | |
| { | |
| "epoch": 0.5597014925373134, | |
| "grad_norm": 0.6173823876530793, | |
| "learning_rate": 3.1773049645390076e-05, | |
| "loss": 0.3011, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30158767104148865, | |
| "step": 225, | |
| "valid_targets_mean": 4075.6, | |
| "valid_targets_min": 2042 | |
| }, | |
| { | |
| "epoch": 0.572139303482587, | |
| "grad_norm": 0.7805956934434903, | |
| "learning_rate": 3.24822695035461e-05, | |
| "loss": 0.3008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31823039054870605, | |
| "step": 230, | |
| "valid_targets_mean": 3906.0, | |
| "valid_targets_min": 1425 | |
| }, | |
| { | |
| "epoch": 0.5845771144278606, | |
| "grad_norm": 0.6419587125168111, | |
| "learning_rate": 3.319148936170213e-05, | |
| "loss": 0.302, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31326407194137573, | |
| "step": 235, | |
| "valid_targets_mean": 3665.2, | |
| "valid_targets_min": 1696 | |
| }, | |
| { | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 0.8679447185514305, | |
| "learning_rate": 3.390070921985816e-05, | |
| "loss": 0.3012, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29827624559402466, | |
| "step": 240, | |
| "valid_targets_mean": 3904.9, | |
| "valid_targets_min": 2058 | |
| }, | |
| { | |
| "epoch": 0.6094527363184079, | |
| "grad_norm": 0.6810499039895173, | |
| "learning_rate": 3.460992907801419e-05, | |
| "loss": 0.2988, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2755545377731323, | |
| "step": 245, | |
| "valid_targets_mean": 3971.8, | |
| "valid_targets_min": 2000 | |
| }, | |
| { | |
| "epoch": 0.6218905472636815, | |
| "grad_norm": 0.6597104835502321, | |
| "learning_rate": 3.531914893617022e-05, | |
| "loss": 0.2968, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2824779152870178, | |
| "step": 250, | |
| "valid_targets_mean": 3968.2, | |
| "valid_targets_min": 1270 | |
| }, | |
| { | |
| "epoch": 0.6343283582089553, | |
| "grad_norm": 0.740278069571726, | |
| "learning_rate": 3.602836879432624e-05, | |
| "loss": 0.2997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2923567593097687, | |
| "step": 255, | |
| "valid_targets_mean": 3380.2, | |
| "valid_targets_min": 2111 | |
| }, | |
| { | |
| "epoch": 0.6467661691542289, | |
| "grad_norm": 0.6593440154558078, | |
| "learning_rate": 3.673758865248227e-05, | |
| "loss": 0.3098, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.315481960773468, | |
| "step": 260, | |
| "valid_targets_mean": 3845.1, | |
| "valid_targets_min": 1793 | |
| }, | |
| { | |
| "epoch": 0.6592039800995025, | |
| "grad_norm": 0.5518441987402686, | |
| "learning_rate": 3.74468085106383e-05, | |
| "loss": 0.2894, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2802160084247589, | |
| "step": 265, | |
| "valid_targets_mean": 4580.6, | |
| "valid_targets_min": 2473 | |
| }, | |
| { | |
| "epoch": 0.6716417910447762, | |
| "grad_norm": 0.8104837314241858, | |
| "learning_rate": 3.815602836879433e-05, | |
| "loss": 0.3062, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3234109878540039, | |
| "step": 270, | |
| "valid_targets_mean": 3906.8, | |
| "valid_targets_min": 2167 | |
| }, | |
| { | |
| "epoch": 0.6840796019900498, | |
| "grad_norm": 0.574610630199438, | |
| "learning_rate": 3.8865248226950355e-05, | |
| "loss": 0.2879, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2707560956478119, | |
| "step": 275, | |
| "valid_targets_mean": 4401.9, | |
| "valid_targets_min": 2550 | |
| }, | |
| { | |
| "epoch": 0.6965174129353234, | |
| "grad_norm": 0.6473677332767744, | |
| "learning_rate": 3.9574468085106385e-05, | |
| "loss": 0.2886, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29438745975494385, | |
| "step": 280, | |
| "valid_targets_mean": 4070.2, | |
| "valid_targets_min": 2058 | |
| }, | |
| { | |
| "epoch": 0.7089552238805971, | |
| "grad_norm": 0.6669914526756555, | |
| "learning_rate": 3.999993842107385e-05, | |
| "loss": 0.3014, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29622241854667664, | |
| "step": 285, | |
| "valid_targets_mean": 3670.9, | |
| "valid_targets_min": 1575 | |
| }, | |
| { | |
| "epoch": 0.7213930348258707, | |
| "grad_norm": 0.6040016655187556, | |
| "learning_rate": 3.999924566250946e-05, | |
| "loss": 0.2847, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25055813789367676, | |
| "step": 290, | |
| "valid_targets_mean": 4318.6, | |
| "valid_targets_min": 2013 | |
| }, | |
| { | |
| "epoch": 0.7338308457711443, | |
| "grad_norm": 0.6412537411386398, | |
| "learning_rate": 3.999778319847388e-05, | |
| "loss": 0.275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2983705401420593, | |
| "step": 295, | |
| "valid_targets_mean": 4027.8, | |
| "valid_targets_min": 1756 | |
| }, | |
| { | |
| "epoch": 0.746268656716418, | |
| "grad_norm": 0.7602229991895589, | |
| "learning_rate": 3.999555108525255e-05, | |
| "loss": 0.2855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2838355004787445, | |
| "step": 300, | |
| "valid_targets_mean": 4391.4, | |
| "valid_targets_min": 1883 | |
| }, | |
| { | |
| "epoch": 0.7587064676616916, | |
| "grad_norm": 0.5443397707250325, | |
| "learning_rate": 3.999254940875221e-05, | |
| "loss": 0.2941, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30530381202697754, | |
| "step": 305, | |
| "valid_targets_mean": 4607.9, | |
| "valid_targets_min": 1976 | |
| }, | |
| { | |
| "epoch": 0.7711442786069652, | |
| "grad_norm": 0.7644982978652062, | |
| "learning_rate": 3.998877828449755e-05, | |
| "loss": 0.2889, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29389941692352295, | |
| "step": 310, | |
| "valid_targets_mean": 3338.8, | |
| "valid_targets_min": 1997 | |
| }, | |
| { | |
| "epoch": 0.7835820895522388, | |
| "grad_norm": 0.6097921931378489, | |
| "learning_rate": 3.99842378576268e-05, | |
| "loss": 0.2791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2719549834728241, | |
| "step": 315, | |
| "valid_targets_mean": 4149.0, | |
| "valid_targets_min": 2106 | |
| }, | |
| { | |
| "epoch": 0.7960199004975125, | |
| "grad_norm": 0.6026543056079762, | |
| "learning_rate": 3.997892830288611e-05, | |
| "loss": 0.2763, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28515109419822693, | |
| "step": 320, | |
| "valid_targets_mean": 3764.9, | |
| "valid_targets_min": 2243 | |
| }, | |
| { | |
| "epoch": 0.8084577114427861, | |
| "grad_norm": 0.7013246853914047, | |
| "learning_rate": 3.997284982462286e-05, | |
| "loss": 0.3086, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32851818203926086, | |
| "step": 325, | |
| "valid_targets_mean": 3250.0, | |
| "valid_targets_min": 196 | |
| }, | |
| { | |
| "epoch": 0.8208955223880597, | |
| "grad_norm": 0.6272103188781436, | |
| "learning_rate": 3.9966002656777775e-05, | |
| "loss": 0.2838, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2576582133769989, | |
| "step": 330, | |
| "valid_targets_mean": 3494.9, | |
| "valid_targets_min": 471 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 0.6106794281804707, | |
| "learning_rate": 3.9958387062875924e-05, | |
| "loss": 0.2803, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28204280138015747, | |
| "step": 335, | |
| "valid_targets_mean": 3829.8, | |
| "valid_targets_min": 1935 | |
| }, | |
| { | |
| "epoch": 0.845771144278607, | |
| "grad_norm": 0.5489443050050972, | |
| "learning_rate": 3.9950003336016564e-05, | |
| "loss": 0.2772, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2528287172317505, | |
| "step": 340, | |
| "valid_targets_mean": 4614.7, | |
| "valid_targets_min": 2051 | |
| }, | |
| { | |
| "epoch": 0.8582089552238806, | |
| "grad_norm": 0.5465751723442666, | |
| "learning_rate": 3.99408517988619e-05, | |
| "loss": 0.2654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26588577032089233, | |
| "step": 345, | |
| "valid_targets_mean": 4694.1, | |
| "valid_targets_min": 2446 | |
| }, | |
| { | |
| "epoch": 0.8706467661691543, | |
| "grad_norm": 0.5320142976407558, | |
| "learning_rate": 3.993093280362462e-05, | |
| "loss": 0.2714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25479674339294434, | |
| "step": 350, | |
| "valid_targets_mean": 4793.0, | |
| "valid_targets_min": 2890 | |
| }, | |
| { | |
| "epoch": 0.8830845771144279, | |
| "grad_norm": 0.643787133545081, | |
| "learning_rate": 3.9920246732054374e-05, | |
| "loss": 0.2751, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2818455398082733, | |
| "step": 355, | |
| "valid_targets_mean": 4366.5, | |
| "valid_targets_min": 1670 | |
| }, | |
| { | |
| "epoch": 0.8955223880597015, | |
| "grad_norm": 0.6844127977814671, | |
| "learning_rate": 3.990879399542305e-05, | |
| "loss": 0.2673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2798788845539093, | |
| "step": 360, | |
| "valid_targets_mean": 3420.6, | |
| "valid_targets_min": 1241 | |
| }, | |
| { | |
| "epoch": 0.9079601990049752, | |
| "grad_norm": 0.5798388818266013, | |
| "learning_rate": 3.989657503450898e-05, | |
| "loss": 0.2859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28030702471733093, | |
| "step": 365, | |
| "valid_targets_mean": 4205.9, | |
| "valid_targets_min": 2328 | |
| }, | |
| { | |
| "epoch": 0.9203980099502488, | |
| "grad_norm": 0.5693045180329982, | |
| "learning_rate": 3.9883590319579966e-05, | |
| "loss": 0.2952, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3048206567764282, | |
| "step": 370, | |
| "valid_targets_mean": 4680.9, | |
| "valid_targets_min": 2049 | |
| }, | |
| { | |
| "epoch": 0.9328358208955224, | |
| "grad_norm": 0.77488630073332, | |
| "learning_rate": 3.986984035037514e-05, | |
| "loss": 0.2759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2770192325115204, | |
| "step": 375, | |
| "valid_targets_mean": 3701.6, | |
| "valid_targets_min": 374 | |
| }, | |
| { | |
| "epoch": 0.945273631840796, | |
| "grad_norm": 0.5726365998589211, | |
| "learning_rate": 3.9855325656085815e-05, | |
| "loss": 0.279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29798316955566406, | |
| "step": 380, | |
| "valid_targets_mean": 4057.5, | |
| "valid_targets_min": 1639 | |
| }, | |
| { | |
| "epoch": 0.9577114427860697, | |
| "grad_norm": 0.5794218327334973, | |
| "learning_rate": 3.984004679533502e-05, | |
| "loss": 0.2721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2974611520767212, | |
| "step": 385, | |
| "valid_targets_mean": 4386.4, | |
| "valid_targets_min": 1244 | |
| }, | |
| { | |
| "epoch": 0.9701492537313433, | |
| "grad_norm": 0.6200804315597975, | |
| "learning_rate": 3.982400435615608e-05, | |
| "loss": 0.2654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26094967126846313, | |
| "step": 390, | |
| "valid_targets_mean": 3871.4, | |
| "valid_targets_min": 2061 | |
| }, | |
| { | |
| "epoch": 0.9825870646766169, | |
| "grad_norm": 0.5663415236256595, | |
| "learning_rate": 3.980719895596994e-05, | |
| "loss": 0.2679, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2790575921535492, | |
| "step": 395, | |
| "valid_targets_mean": 4268.0, | |
| "valid_targets_min": 819 | |
| }, | |
| { | |
| "epoch": 0.9950248756218906, | |
| "grad_norm": 0.5335095669700068, | |
| "learning_rate": 3.978963124156141e-05, | |
| "loss": 0.2692, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25512856245040894, | |
| "step": 400, | |
| "valid_targets_mean": 4742.2, | |
| "valid_targets_min": 2403 | |
| }, | |
| { | |
| "epoch": 1.007462686567164, | |
| "grad_norm": 0.6706949371800399, | |
| "learning_rate": 3.977130188905429e-05, | |
| "loss": 0.2671, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25862938165664673, | |
| "step": 405, | |
| "valid_targets_mean": 3975.2, | |
| "valid_targets_min": 1886 | |
| }, | |
| { | |
| "epoch": 1.0199004975124377, | |
| "grad_norm": 0.6198638973234687, | |
| "learning_rate": 3.975221160388535e-05, | |
| "loss": 0.2586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2816004753112793, | |
| "step": 410, | |
| "valid_targets_mean": 4115.3, | |
| "valid_targets_min": 1240 | |
| }, | |
| { | |
| "epoch": 1.0323383084577114, | |
| "grad_norm": 0.5477295121301582, | |
| "learning_rate": 3.973236112077712e-05, | |
| "loss": 0.2449, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2588885724544525, | |
| "step": 415, | |
| "valid_targets_mean": 4426.1, | |
| "valid_targets_min": 2062 | |
| }, | |
| { | |
| "epoch": 1.044776119402985, | |
| "grad_norm": 0.6101761931142692, | |
| "learning_rate": 3.971175120370971e-05, | |
| "loss": 0.2552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26513034105300903, | |
| "step": 420, | |
| "valid_targets_mean": 4070.8, | |
| "valid_targets_min": 1988 | |
| }, | |
| { | |
| "epoch": 1.0572139303482586, | |
| "grad_norm": 0.5897683101564732, | |
| "learning_rate": 3.969038264589132e-05, | |
| "loss": 0.2528, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25092267990112305, | |
| "step": 425, | |
| "valid_targets_mean": 4073.1, | |
| "valid_targets_min": 443 | |
| }, | |
| { | |
| "epoch": 1.0696517412935322, | |
| "grad_norm": 0.5625409270111561, | |
| "learning_rate": 3.966825626972777e-05, | |
| "loss": 0.2694, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2630409002304077, | |
| "step": 430, | |
| "valid_targets_mean": 4102.7, | |
| "valid_targets_min": 1379 | |
| }, | |
| { | |
| "epoch": 1.0820895522388059, | |
| "grad_norm": 0.565867121844965, | |
| "learning_rate": 3.964537292679081e-05, | |
| "loss": 0.2529, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2534593641757965, | |
| "step": 435, | |
| "valid_targets_mean": 4240.8, | |
| "valid_targets_min": 1908 | |
| }, | |
| { | |
| "epoch": 1.0945273631840795, | |
| "grad_norm": 0.6406440477946179, | |
| "learning_rate": 3.962173349778538e-05, | |
| "loss": 0.2594, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28820428252220154, | |
| "step": 440, | |
| "valid_targets_mean": 3382.1, | |
| "valid_targets_min": 1769 | |
| }, | |
| { | |
| "epoch": 1.1069651741293531, | |
| "grad_norm": 0.5787142049618801, | |
| "learning_rate": 3.959733889251569e-05, | |
| "loss": 0.2726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2697809636592865, | |
| "step": 445, | |
| "valid_targets_mean": 4141.3, | |
| "valid_targets_min": 1130 | |
| }, | |
| { | |
| "epoch": 1.1194029850746268, | |
| "grad_norm": 0.5835970548274867, | |
| "learning_rate": 3.9572190049850186e-05, | |
| "loss": 0.2645, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2650759518146515, | |
| "step": 450, | |
| "valid_targets_mean": 3681.7, | |
| "valid_targets_min": 1929 | |
| }, | |
| { | |
| "epoch": 1.1318407960199006, | |
| "grad_norm": 0.6578060953434757, | |
| "learning_rate": 3.9546287937685485e-05, | |
| "loss": 0.2656, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2592794895172119, | |
| "step": 455, | |
| "valid_targets_mean": 3309.7, | |
| "valid_targets_min": 1400 | |
| }, | |
| { | |
| "epoch": 1.144278606965174, | |
| "grad_norm": 0.5127241563740503, | |
| "learning_rate": 3.9519633552909054e-05, | |
| "loss": 0.2541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23262007534503937, | |
| "step": 460, | |
| "valid_targets_mean": 5108.5, | |
| "valid_targets_min": 2353 | |
| }, | |
| { | |
| "epoch": 1.1567164179104479, | |
| "grad_norm": 0.6164622352052561, | |
| "learning_rate": 3.949222792136087e-05, | |
| "loss": 0.2472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23169252276420593, | |
| "step": 465, | |
| "valid_targets_mean": 4548.3, | |
| "valid_targets_min": 1282 | |
| }, | |
| { | |
| "epoch": 1.1691542288557213, | |
| "grad_norm": 0.6496210536118021, | |
| "learning_rate": 3.946407209779395e-05, | |
| "loss": 0.264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2703050374984741, | |
| "step": 470, | |
| "valid_targets_mean": 3854.2, | |
| "valid_targets_min": 2534 | |
| }, | |
| { | |
| "epoch": 1.1815920398009951, | |
| "grad_norm": 0.6983143179255165, | |
| "learning_rate": 3.9435167165833724e-05, | |
| "loss": 0.2581, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26550132036209106, | |
| "step": 475, | |
| "valid_targets_mean": 3896.3, | |
| "valid_targets_min": 2130 | |
| }, | |
| { | |
| "epoch": 1.1940298507462686, | |
| "grad_norm": 0.5555184412354105, | |
| "learning_rate": 3.940551423793638e-05, | |
| "loss": 0.2603, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2535971403121948, | |
| "step": 480, | |
| "valid_targets_mean": 4125.7, | |
| "valid_targets_min": 1715 | |
| }, | |
| { | |
| "epoch": 1.2064676616915424, | |
| "grad_norm": 0.5704921624189617, | |
| "learning_rate": 3.937511445534599e-05, | |
| "loss": 0.2525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26805728673934937, | |
| "step": 485, | |
| "valid_targets_mean": 3877.7, | |
| "valid_targets_min": 2189 | |
| }, | |
| { | |
| "epoch": 1.2189054726368158, | |
| "grad_norm": 0.6495765923740222, | |
| "learning_rate": 3.934396898805064e-05, | |
| "loss": 0.2544, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24718452990055084, | |
| "step": 490, | |
| "valid_targets_mean": 3588.3, | |
| "valid_targets_min": 1692 | |
| }, | |
| { | |
| "epoch": 1.2313432835820897, | |
| "grad_norm": 0.5876505289888774, | |
| "learning_rate": 3.931207903473737e-05, | |
| "loss": 0.2641, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2569234371185303, | |
| "step": 495, | |
| "valid_targets_mean": 4021.1, | |
| "valid_targets_min": 1943 | |
| }, | |
| { | |
| "epoch": 1.243781094527363, | |
| "grad_norm": 0.5703559162652219, | |
| "learning_rate": 3.9279445822746045e-05, | |
| "loss": 0.2438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23905381560325623, | |
| "step": 500, | |
| "valid_targets_mean": 3682.4, | |
| "valid_targets_min": 1850 | |
| }, | |
| { | |
| "epoch": 1.256218905472637, | |
| "grad_norm": 0.6506750265611713, | |
| "learning_rate": 3.9246070608022125e-05, | |
| "loss": 0.263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26591765880584717, | |
| "step": 505, | |
| "valid_targets_mean": 3393.1, | |
| "valid_targets_min": 2128 | |
| }, | |
| { | |
| "epoch": 1.2686567164179103, | |
| "grad_norm": 0.5787258957804087, | |
| "learning_rate": 3.921195467506833e-05, | |
| "loss": 0.2645, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2495618611574173, | |
| "step": 510, | |
| "valid_targets_mean": 3888.8, | |
| "valid_targets_min": 1134 | |
| }, | |
| { | |
| "epoch": 1.2810945273631842, | |
| "grad_norm": 0.519436549423975, | |
| "learning_rate": 3.917709933689519e-05, | |
| "loss": 0.2624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26761433482170105, | |
| "step": 515, | |
| "valid_targets_mean": 4347.1, | |
| "valid_targets_min": 1773 | |
| }, | |
| { | |
| "epoch": 1.2935323383084576, | |
| "grad_norm": 0.6076968588802545, | |
| "learning_rate": 3.914150593497054e-05, | |
| "loss": 0.2459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2475341260433197, | |
| "step": 520, | |
| "valid_targets_mean": 3474.6, | |
| "valid_targets_min": 777 | |
| }, | |
| { | |
| "epoch": 1.3059701492537314, | |
| "grad_norm": 0.6306744703358103, | |
| "learning_rate": 3.910517583916783e-05, | |
| "loss": 0.2565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28335511684417725, | |
| "step": 525, | |
| "valid_targets_mean": 3507.3, | |
| "valid_targets_min": 1850 | |
| }, | |
| { | |
| "epoch": 1.3184079601990049, | |
| "grad_norm": 0.5110706633146148, | |
| "learning_rate": 3.90681104477135e-05, | |
| "loss": 0.2519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23450972139835358, | |
| "step": 530, | |
| "valid_targets_mean": 4537.1, | |
| "valid_targets_min": 1051 | |
| }, | |
| { | |
| "epoch": 1.3308457711442787, | |
| "grad_norm": 0.6389986218399706, | |
| "learning_rate": 3.903031118713307e-05, | |
| "loss": 0.2555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26319772005081177, | |
| "step": 535, | |
| "valid_targets_mean": 3198.9, | |
| "valid_targets_min": 1306 | |
| }, | |
| { | |
| "epoch": 1.3432835820895521, | |
| "grad_norm": 0.6465328887813366, | |
| "learning_rate": 3.8991779512196294e-05, | |
| "loss": 0.2685, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25991159677505493, | |
| "step": 540, | |
| "valid_targets_mean": 3375.8, | |
| "valid_targets_min": 1682 | |
| }, | |
| { | |
| "epoch": 1.355721393034826, | |
| "grad_norm": 0.5736019923931159, | |
| "learning_rate": 3.8952516905861155e-05, | |
| "loss": 0.2565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2686164379119873, | |
| "step": 545, | |
| "valid_targets_mean": 4258.2, | |
| "valid_targets_min": 1889 | |
| }, | |
| { | |
| "epoch": 1.3681592039800994, | |
| "grad_norm": 0.5067100499995646, | |
| "learning_rate": 3.89125248792168e-05, | |
| "loss": 0.2426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23678192496299744, | |
| "step": 550, | |
| "valid_targets_mean": 4387.9, | |
| "valid_targets_min": 1724 | |
| }, | |
| { | |
| "epoch": 1.3805970149253732, | |
| "grad_norm": 0.5477380240295115, | |
| "learning_rate": 3.8871804971425353e-05, | |
| "loss": 0.2521, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2501421570777893, | |
| "step": 555, | |
| "valid_targets_mean": 3848.6, | |
| "valid_targets_min": 1156 | |
| }, | |
| { | |
| "epoch": 1.3930348258706466, | |
| "grad_norm": 0.5863449356154877, | |
| "learning_rate": 3.883035874966273e-05, | |
| "loss": 0.2547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24096009135246277, | |
| "step": 560, | |
| "valid_targets_mean": 3734.2, | |
| "valid_targets_min": 1872 | |
| }, | |
| { | |
| "epoch": 1.4054726368159205, | |
| "grad_norm": 0.5379477926228984, | |
| "learning_rate": 3.878818780905826e-05, | |
| "loss": 0.2509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23347872495651245, | |
| "step": 565, | |
| "valid_targets_mean": 4070.9, | |
| "valid_targets_min": 2509 | |
| }, | |
| { | |
| "epoch": 1.417910447761194, | |
| "grad_norm": 0.5419932690009873, | |
| "learning_rate": 3.874529377263335e-05, | |
| "loss": 0.2626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25849100947380066, | |
| "step": 570, | |
| "valid_targets_mean": 4307.4, | |
| "valid_targets_min": 1746 | |
| }, | |
| { | |
| "epoch": 1.4303482587064678, | |
| "grad_norm": 0.5729702092975528, | |
| "learning_rate": 3.870167829123899e-05, | |
| "loss": 0.262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2920796871185303, | |
| "step": 575, | |
| "valid_targets_mean": 4220.1, | |
| "valid_targets_min": 1363 | |
| }, | |
| { | |
| "epoch": 1.4427860696517412, | |
| "grad_norm": 0.5205776739120929, | |
| "learning_rate": 3.865734304349224e-05, | |
| "loss": 0.2458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23339340090751648, | |
| "step": 580, | |
| "valid_targets_mean": 4367.3, | |
| "valid_targets_min": 1685 | |
| }, | |
| { | |
| "epoch": 1.455223880597015, | |
| "grad_norm": 0.5536951564209288, | |
| "learning_rate": 3.861228973571158e-05, | |
| "loss": 0.2489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24707631766796112, | |
| "step": 585, | |
| "valid_targets_mean": 4527.9, | |
| "valid_targets_min": 1406 | |
| }, | |
| { | |
| "epoch": 1.4676616915422884, | |
| "grad_norm": 0.564511967740949, | |
| "learning_rate": 3.856652010185128e-05, | |
| "loss": 0.2538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2429649531841278, | |
| "step": 590, | |
| "valid_targets_mean": 3914.8, | |
| "valid_targets_min": 2402 | |
| }, | |
| { | |
| "epoch": 1.4800995024875623, | |
| "grad_norm": 0.5579953289935093, | |
| "learning_rate": 3.852003590343467e-05, | |
| "loss": 0.2442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21945007145404816, | |
| "step": 595, | |
| "valid_targets_mean": 4183.3, | |
| "valid_targets_min": 1280 | |
| }, | |
| { | |
| "epoch": 1.4925373134328357, | |
| "grad_norm": 0.5631837154917557, | |
| "learning_rate": 3.847283892948631e-05, | |
| "loss": 0.2541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27015361189842224, | |
| "step": 600, | |
| "valid_targets_mean": 3908.9, | |
| "valid_targets_min": 1638 | |
| }, | |
| { | |
| "epoch": 1.5049751243781095, | |
| "grad_norm": 0.5901864513013043, | |
| "learning_rate": 3.8424930996463173e-05, | |
| "loss": 0.2565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25428202748298645, | |
| "step": 605, | |
| "valid_targets_mean": 4185.3, | |
| "valid_targets_min": 1322 | |
| }, | |
| { | |
| "epoch": 1.517412935323383, | |
| "grad_norm": 0.5161889221577463, | |
| "learning_rate": 3.837631394818471e-05, | |
| "loss": 0.2399, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2310006320476532, | |
| "step": 610, | |
| "valid_targets_mean": 4516.8, | |
| "valid_targets_min": 1988 | |
| }, | |
| { | |
| "epoch": 1.5298507462686568, | |
| "grad_norm": 0.5359807008019226, | |
| "learning_rate": 3.832698965576189e-05, | |
| "loss": 0.256, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2637181878089905, | |
| "step": 615, | |
| "valid_targets_mean": 3867.1, | |
| "valid_targets_min": 2465 | |
| }, | |
| { | |
| "epoch": 1.5422885572139302, | |
| "grad_norm": 0.5674145217928909, | |
| "learning_rate": 3.8276960017525197e-05, | |
| "loss": 0.237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22988495230674744, | |
| "step": 620, | |
| "valid_targets_mean": 4242.8, | |
| "valid_targets_min": 1588 | |
| }, | |
| { | |
| "epoch": 1.554726368159204, | |
| "grad_norm": 0.5723411242257961, | |
| "learning_rate": 3.822622695895157e-05, | |
| "loss": 0.2578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25821536779403687, | |
| "step": 625, | |
| "valid_targets_mean": 4392.1, | |
| "valid_targets_min": 1765 | |
| }, | |
| { | |
| "epoch": 1.5671641791044775, | |
| "grad_norm": 0.5376996090192159, | |
| "learning_rate": 3.8174792432590294e-05, | |
| "loss": 0.2573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23149748146533966, | |
| "step": 630, | |
| "valid_targets_mean": 4030.4, | |
| "valid_targets_min": 1742 | |
| }, | |
| { | |
| "epoch": 1.5796019900497513, | |
| "grad_norm": 0.5681703694899468, | |
| "learning_rate": 3.8122658417987854e-05, | |
| "loss": 0.2472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24532127380371094, | |
| "step": 635, | |
| "valid_targets_mean": 3592.2, | |
| "valid_targets_min": 963 | |
| }, | |
| { | |
| "epoch": 1.5920398009950247, | |
| "grad_norm": 0.5672170181926013, | |
| "learning_rate": 3.8069826921611736e-05, | |
| "loss": 0.2527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25750595331192017, | |
| "step": 640, | |
| "valid_targets_mean": 3691.6, | |
| "valid_targets_min": 2389 | |
| }, | |
| { | |
| "epoch": 1.6044776119402986, | |
| "grad_norm": 0.5840248420927169, | |
| "learning_rate": 3.8016299976773215e-05, | |
| "loss": 0.2479, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24948638677597046, | |
| "step": 645, | |
| "valid_targets_mean": 3554.7, | |
| "valid_targets_min": 2311 | |
| }, | |
| { | |
| "epoch": 1.616915422885572, | |
| "grad_norm": 0.5359793017604382, | |
| "learning_rate": 3.796207964354911e-05, | |
| "loss": 0.2419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.236485093832016, | |
| "step": 650, | |
| "valid_targets_mean": 4295.7, | |
| "valid_targets_min": 2176 | |
| }, | |
| { | |
| "epoch": 1.6293532338308458, | |
| "grad_norm": 0.5407696413708218, | |
| "learning_rate": 3.7907168008702485e-05, | |
| "loss": 0.2452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24842476844787598, | |
| "step": 655, | |
| "valid_targets_mean": 4364.2, | |
| "valid_targets_min": 1789 | |
| }, | |
| { | |
| "epoch": 1.6417910447761193, | |
| "grad_norm": 0.5209593522870282, | |
| "learning_rate": 3.785156718560234e-05, | |
| "loss": 0.2522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23498183488845825, | |
| "step": 660, | |
| "valid_targets_mean": 4318.7, | |
| "valid_targets_min": 2193 | |
| }, | |
| { | |
| "epoch": 1.654228855721393, | |
| "grad_norm": 0.550433073135447, | |
| "learning_rate": 3.779527931414227e-05, | |
| "loss": 0.2384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22987571358680725, | |
| "step": 665, | |
| "valid_targets_mean": 4133.3, | |
| "valid_targets_min": 1619 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.5574429313585283, | |
| "learning_rate": 3.773830656065811e-05, | |
| "loss": 0.249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2494204193353653, | |
| "step": 670, | |
| "valid_targets_mean": 4016.2, | |
| "valid_targets_min": 2044 | |
| }, | |
| { | |
| "epoch": 1.6791044776119404, | |
| "grad_norm": 0.6329586119901996, | |
| "learning_rate": 3.768065111784457e-05, | |
| "loss": 0.2474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2279907613992691, | |
| "step": 675, | |
| "valid_targets_mean": 3598.1, | |
| "valid_targets_min": 819 | |
| }, | |
| { | |
| "epoch": 1.6915422885572138, | |
| "grad_norm": 0.5334914217295829, | |
| "learning_rate": 3.762231520467082e-05, | |
| "loss": 0.2487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2509036362171173, | |
| "step": 680, | |
| "valid_targets_mean": 3799.6, | |
| "valid_targets_min": 1225 | |
| }, | |
| { | |
| "epoch": 1.7039800995024876, | |
| "grad_norm": 0.5489580465491168, | |
| "learning_rate": 3.7563301066295144e-05, | |
| "loss": 0.2369, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2305251657962799, | |
| "step": 685, | |
| "valid_targets_mean": 3473.2, | |
| "valid_targets_min": 992 | |
| }, | |
| { | |
| "epoch": 1.716417910447761, | |
| "grad_norm": 0.5288516281459692, | |
| "learning_rate": 3.750361097397844e-05, | |
| "loss": 0.2382, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23850487172603607, | |
| "step": 690, | |
| "valid_targets_mean": 4001.9, | |
| "valid_targets_min": 1741 | |
| }, | |
| { | |
| "epoch": 1.728855721393035, | |
| "grad_norm": 0.5800907388120042, | |
| "learning_rate": 3.74432472249969e-05, | |
| "loss": 0.2438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25247111916542053, | |
| "step": 695, | |
| "valid_targets_mean": 3964.8, | |
| "valid_targets_min": 1725 | |
| }, | |
| { | |
| "epoch": 1.7412935323383083, | |
| "grad_norm": 0.5153130568766624, | |
| "learning_rate": 3.7382212142553526e-05, | |
| "loss": 0.2424, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23832815885543823, | |
| "step": 700, | |
| "valid_targets_mean": 5047.1, | |
| "valid_targets_min": 2105 | |
| }, | |
| { | |
| "epoch": 1.7537313432835822, | |
| "grad_norm": 0.4775135414376218, | |
| "learning_rate": 3.732050807568878e-05, | |
| "loss": 0.2472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2324645072221756, | |
| "step": 705, | |
| "valid_targets_mean": 5206.6, | |
| "valid_targets_min": 2405 | |
| }, | |
| { | |
| "epoch": 1.7661691542288556, | |
| "grad_norm": 0.6038057766815212, | |
| "learning_rate": 3.7258137399190104e-05, | |
| "loss": 0.247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23930123448371887, | |
| "step": 710, | |
| "valid_targets_mean": 4152.9, | |
| "valid_targets_min": 1787 | |
| }, | |
| { | |
| "epoch": 1.7786069651741294, | |
| "grad_norm": 0.5565167118827314, | |
| "learning_rate": 3.71951025135006e-05, | |
| "loss": 0.2629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2660183310508728, | |
| "step": 715, | |
| "valid_targets_mean": 3864.9, | |
| "valid_targets_min": 1404 | |
| }, | |
| { | |
| "epoch": 1.7910447761194028, | |
| "grad_norm": 0.6001026329663603, | |
| "learning_rate": 3.713140584462659e-05, | |
| "loss": 0.2531, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2475225031375885, | |
| "step": 720, | |
| "valid_targets_mean": 3926.4, | |
| "valid_targets_min": 196 | |
| }, | |
| { | |
| "epoch": 1.8034825870646767, | |
| "grad_norm": 0.5652238036041524, | |
| "learning_rate": 3.7067049844044246e-05, | |
| "loss": 0.2492, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24355122447013855, | |
| "step": 725, | |
| "valid_targets_mean": 4200.6, | |
| "valid_targets_min": 2117 | |
| }, | |
| { | |
| "epoch": 1.81592039800995, | |
| "grad_norm": 0.5488019914228596, | |
| "learning_rate": 3.700203698860528e-05, | |
| "loss": 0.2396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2313595861196518, | |
| "step": 730, | |
| "valid_targets_mean": 3598.3, | |
| "valid_targets_min": 2223 | |
| }, | |
| { | |
| "epoch": 1.828358208955224, | |
| "grad_norm": 0.51796472681004, | |
| "learning_rate": 3.6936369780441605e-05, | |
| "loss": 0.2562, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24363797903060913, | |
| "step": 735, | |
| "valid_targets_mean": 4339.6, | |
| "valid_targets_min": 2214 | |
| }, | |
| { | |
| "epoch": 1.8407960199004973, | |
| "grad_norm": 0.5659545424872905, | |
| "learning_rate": 3.6870050746869e-05, | |
| "loss": 0.242, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24636013805866241, | |
| "step": 740, | |
| "valid_targets_mean": 4091.0, | |
| "valid_targets_min": 1926 | |
| }, | |
| { | |
| "epoch": 1.8532338308457712, | |
| "grad_norm": 0.5096718996130041, | |
| "learning_rate": 3.680308244028988e-05, | |
| "loss": 0.2544, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21994280815124512, | |
| "step": 745, | |
| "valid_targets_mean": 4172.7, | |
| "valid_targets_min": 1149 | |
| }, | |
| { | |
| "epoch": 1.8656716417910446, | |
| "grad_norm": 0.5065361527427243, | |
| "learning_rate": 3.673546743809507e-05, | |
| "loss": 0.2508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23863369226455688, | |
| "step": 750, | |
| "valid_targets_mean": 4798.6, | |
| "valid_targets_min": 2111 | |
| }, | |
| { | |
| "epoch": 1.8781094527363185, | |
| "grad_norm": 0.5931017170469689, | |
| "learning_rate": 3.666720834256456e-05, | |
| "loss": 0.2463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2788572311401367, | |
| "step": 755, | |
| "valid_targets_mean": 4347.8, | |
| "valid_targets_min": 2099 | |
| }, | |
| { | |
| "epoch": 1.890547263681592, | |
| "grad_norm": 0.5455237703990018, | |
| "learning_rate": 3.659830778076741e-05, | |
| "loss": 0.2609, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23518797755241394, | |
| "step": 760, | |
| "valid_targets_mean": 4198.4, | |
| "valid_targets_min": 2807 | |
| }, | |
| { | |
| "epoch": 1.9029850746268657, | |
| "grad_norm": 0.5393054024523214, | |
| "learning_rate": 3.65287684044606e-05, | |
| "loss": 0.2464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2718955874443054, | |
| "step": 765, | |
| "valid_targets_mean": 3854.9, | |
| "valid_targets_min": 2330 | |
| }, | |
| { | |
| "epoch": 1.9154228855721394, | |
| "grad_norm": 0.8356397481562335, | |
| "learning_rate": 3.6458592889986986e-05, | |
| "loss": 0.2567, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25400418043136597, | |
| "step": 770, | |
| "valid_targets_mean": 4668.4, | |
| "valid_targets_min": 2281 | |
| }, | |
| { | |
| "epoch": 1.927860696517413, | |
| "grad_norm": 0.5065732254922442, | |
| "learning_rate": 3.638778393817233e-05, | |
| "loss": 0.2428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2371484339237213, | |
| "step": 775, | |
| "valid_targets_mean": 4130.5, | |
| "valid_targets_min": 2548 | |
| }, | |
| { | |
| "epoch": 1.9402985074626866, | |
| "grad_norm": 0.5421381515387088, | |
| "learning_rate": 3.6316344274221276e-05, | |
| "loss": 0.2365, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25802528858184814, | |
| "step": 780, | |
| "valid_targets_mean": 4095.2, | |
| "valid_targets_min": 2527 | |
| }, | |
| { | |
| "epoch": 1.9527363184079602, | |
| "grad_norm": 0.5365802561261709, | |
| "learning_rate": 3.624427664761254e-05, | |
| "loss": 0.245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23089924454689026, | |
| "step": 785, | |
| "valid_targets_mean": 3962.5, | |
| "valid_targets_min": 2058 | |
| }, | |
| { | |
| "epoch": 1.9651741293532339, | |
| "grad_norm": 0.5643291821086128, | |
| "learning_rate": 3.6171583831993076e-05, | |
| "loss": 0.2442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25705310702323914, | |
| "step": 790, | |
| "valid_targets_mean": 4278.8, | |
| "valid_targets_min": 2126 | |
| }, | |
| { | |
| "epoch": 1.9776119402985075, | |
| "grad_norm": 0.5790941804858589, | |
| "learning_rate": 3.609826862507128e-05, | |
| "loss": 0.252, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2411389946937561, | |
| "step": 795, | |
| "valid_targets_mean": 3969.7, | |
| "valid_targets_min": 2089 | |
| }, | |
| { | |
| "epoch": 1.9900497512437811, | |
| "grad_norm": 0.48846796864819103, | |
| "learning_rate": 3.6024333848509384e-05, | |
| "loss": 0.2391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23043224215507507, | |
| "step": 800, | |
| "valid_targets_mean": 4327.9, | |
| "valid_targets_min": 1962 | |
| }, | |
| { | |
| "epoch": 2.0024875621890548, | |
| "grad_norm": 0.5849877152700085, | |
| "learning_rate": 3.594978234781481e-05, | |
| "loss": 0.2378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23692670464515686, | |
| "step": 805, | |
| "valid_targets_mean": 3665.8, | |
| "valid_targets_min": 2020 | |
| }, | |
| { | |
| "epoch": 2.014925373134328, | |
| "grad_norm": 0.538247528051884, | |
| "learning_rate": 3.587461699223067e-05, | |
| "loss": 0.2283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2252245843410492, | |
| "step": 810, | |
| "valid_targets_mean": 4359.8, | |
| "valid_targets_min": 1739 | |
| }, | |
| { | |
| "epoch": 2.027363184079602, | |
| "grad_norm": 0.5381268521719818, | |
| "learning_rate": 3.579884067462535e-05, | |
| "loss": 0.2275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2488606870174408, | |
| "step": 815, | |
| "valid_targets_mean": 4062.9, | |
| "valid_targets_min": 2410 | |
| }, | |
| { | |
| "epoch": 2.0398009950248754, | |
| "grad_norm": 0.6115694003336873, | |
| "learning_rate": 3.572245631138116e-05, | |
| "loss": 0.2301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2483055591583252, | |
| "step": 820, | |
| "valid_targets_mean": 3527.8, | |
| "valid_targets_min": 1282 | |
| }, | |
| { | |
| "epoch": 2.0522388059701493, | |
| "grad_norm": 0.5094263989797099, | |
| "learning_rate": 3.564546684228209e-05, | |
| "loss": 0.223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1991591900587082, | |
| "step": 825, | |
| "valid_targets_mean": 4397.8, | |
| "valid_targets_min": 1706 | |
| }, | |
| { | |
| "epoch": 2.0646766169154227, | |
| "grad_norm": 0.5601385200195886, | |
| "learning_rate": 3.556787523040069e-05, | |
| "loss": 0.228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2520599961280823, | |
| "step": 830, | |
| "valid_targets_mean": 3847.5, | |
| "valid_targets_min": 1319 | |
| }, | |
| { | |
| "epoch": 2.0771144278606966, | |
| "grad_norm": 0.5380726404824564, | |
| "learning_rate": 3.548968446198398e-05, | |
| "loss": 0.2174, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21053871512413025, | |
| "step": 835, | |
| "valid_targets_mean": 4242.5, | |
| "valid_targets_min": 973 | |
| }, | |
| { | |
| "epoch": 2.08955223880597, | |
| "grad_norm": 0.5025117595976427, | |
| "learning_rate": 3.54108975463386e-05, | |
| "loss": 0.2184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2069125920534134, | |
| "step": 840, | |
| "valid_targets_mean": 4396.9, | |
| "valid_targets_min": 2427 | |
| }, | |
| { | |
| "epoch": 2.101990049751244, | |
| "grad_norm": 0.5354301465480238, | |
| "learning_rate": 3.533151751571489e-05, | |
| "loss": 0.2235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2244517207145691, | |
| "step": 845, | |
| "valid_targets_mean": 3890.4, | |
| "valid_targets_min": 1859 | |
| }, | |
| { | |
| "epoch": 2.1144278606965172, | |
| "grad_norm": 0.5610413426394177, | |
| "learning_rate": 3.5251547425190294e-05, | |
| "loss": 0.2248, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24040347337722778, | |
| "step": 850, | |
| "valid_targets_mean": 4252.7, | |
| "valid_targets_min": 1379 | |
| }, | |
| { | |
| "epoch": 2.126865671641791, | |
| "grad_norm": 0.7445240143199634, | |
| "learning_rate": 3.51709903525517e-05, | |
| "loss": 0.2259, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22202104330062866, | |
| "step": 855, | |
| "valid_targets_mean": 3389.6, | |
| "valid_targets_min": 1472 | |
| }, | |
| { | |
| "epoch": 2.1393034825870645, | |
| "grad_norm": 0.5584087371143899, | |
| "learning_rate": 3.5089849398177013e-05, | |
| "loss": 0.237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25943320989608765, | |
| "step": 860, | |
| "valid_targets_mean": 4247.4, | |
| "valid_targets_min": 1713 | |
| }, | |
| { | |
| "epoch": 2.1517412935323383, | |
| "grad_norm": 0.5832938350626173, | |
| "learning_rate": 3.500812768491586e-05, | |
| "loss": 0.2301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2365805208683014, | |
| "step": 865, | |
| "valid_targets_mean": 4043.2, | |
| "valid_targets_min": 1548 | |
| }, | |
| { | |
| "epoch": 2.1641791044776117, | |
| "grad_norm": 0.5400590413107126, | |
| "learning_rate": 3.4925828357969344e-05, | |
| "loss": 0.2321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21506357192993164, | |
| "step": 870, | |
| "valid_targets_mean": 3890.4, | |
| "valid_targets_min": 2165 | |
| }, | |
| { | |
| "epoch": 2.1766169154228856, | |
| "grad_norm": 0.5408725543370927, | |
| "learning_rate": 3.484295458476905e-05, | |
| "loss": 0.2301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2199130356311798, | |
| "step": 875, | |
| "valid_targets_mean": 3965.0, | |
| "valid_targets_min": 1830 | |
| }, | |
| { | |
| "epoch": 2.189054726368159, | |
| "grad_norm": 0.5544609623772936, | |
| "learning_rate": 3.475950955485511e-05, | |
| "loss": 0.2344, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2255849838256836, | |
| "step": 880, | |
| "valid_targets_mean": 3941.9, | |
| "valid_targets_min": 1907 | |
| }, | |
| { | |
| "epoch": 2.201492537313433, | |
| "grad_norm": 0.5335810864251953, | |
| "learning_rate": 3.467549647975346e-05, | |
| "loss": 0.2318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2269669473171234, | |
| "step": 885, | |
| "valid_targets_mean": 3739.8, | |
| "valid_targets_min": 1409 | |
| }, | |
| { | |
| "epoch": 2.2139303482587063, | |
| "grad_norm": 0.544212423789675, | |
| "learning_rate": 3.4590918592852214e-05, | |
| "loss": 0.2226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22404810786247253, | |
| "step": 890, | |
| "valid_targets_mean": 3947.4, | |
| "valid_targets_min": 2505 | |
| }, | |
| { | |
| "epoch": 2.22636815920398, | |
| "grad_norm": 0.48777043633608497, | |
| "learning_rate": 3.450577914927728e-05, | |
| "loss": 0.2283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2288726568222046, | |
| "step": 895, | |
| "valid_targets_mean": 4936.0, | |
| "valid_targets_min": 1097 | |
| }, | |
| { | |
| "epoch": 2.2388059701492535, | |
| "grad_norm": 0.5375629176173219, | |
| "learning_rate": 3.442008142576701e-05, | |
| "loss": 0.2226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21632421016693115, | |
| "step": 900, | |
| "valid_targets_mean": 4131.2, | |
| "valid_targets_min": 2105 | |
| }, | |
| { | |
| "epoch": 2.2512437810945274, | |
| "grad_norm": 0.5159769756167506, | |
| "learning_rate": 3.433382872054614e-05, | |
| "loss": 0.2271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24016112089157104, | |
| "step": 905, | |
| "valid_targets_mean": 4466.8, | |
| "valid_targets_min": 2033 | |
| }, | |
| { | |
| "epoch": 2.2636815920398012, | |
| "grad_norm": 0.5464064642346769, | |
| "learning_rate": 3.4247024353198826e-05, | |
| "loss": 0.2324, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23476913571357727, | |
| "step": 910, | |
| "valid_targets_mean": 4390.9, | |
| "valid_targets_min": 1993 | |
| }, | |
| { | |
| "epoch": 2.2761194029850746, | |
| "grad_norm": 0.5493349140412681, | |
| "learning_rate": 3.415967166454091e-05, | |
| "loss": 0.2232, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21979095041751862, | |
| "step": 915, | |
| "valid_targets_mean": 3712.5, | |
| "valid_targets_min": 853 | |
| }, | |
| { | |
| "epoch": 2.288557213930348, | |
| "grad_norm": 0.5988218138371607, | |
| "learning_rate": 3.4071774016491295e-05, | |
| "loss": 0.2299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22979789972305298, | |
| "step": 920, | |
| "valid_targets_mean": 3520.1, | |
| "valid_targets_min": 1018 | |
| }, | |
| { | |
| "epoch": 2.300995024875622, | |
| "grad_norm": 0.6119392010234547, | |
| "learning_rate": 3.398333479194261e-05, | |
| "loss": 0.2291, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2250669300556183, | |
| "step": 925, | |
| "valid_targets_mean": 3029.0, | |
| "valid_targets_min": 1741 | |
| }, | |
| { | |
| "epoch": 2.3134328358208958, | |
| "grad_norm": 0.5738059500201181, | |
| "learning_rate": 3.389435739463099e-05, | |
| "loss": 0.2282, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22356033325195312, | |
| "step": 930, | |
| "valid_targets_mean": 3890.4, | |
| "valid_targets_min": 1947 | |
| }, | |
| { | |
| "epoch": 2.325870646766169, | |
| "grad_norm": 0.5200786239469339, | |
| "learning_rate": 3.380484524900506e-05, | |
| "loss": 0.2166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.217308908700943, | |
| "step": 935, | |
| "valid_targets_mean": 4040.2, | |
| "valid_targets_min": 1742 | |
| }, | |
| { | |
| "epoch": 2.3383084577114426, | |
| "grad_norm": 0.5347052584780078, | |
| "learning_rate": 3.371480180009418e-05, | |
| "loss": 0.2142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21474695205688477, | |
| "step": 940, | |
| "valid_targets_mean": 3967.4, | |
| "valid_targets_min": 1804 | |
| }, | |
| { | |
| "epoch": 2.3507462686567164, | |
| "grad_norm": 0.6357404551408977, | |
| "learning_rate": 3.362423051337581e-05, | |
| "loss": 0.2424, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2433660328388214, | |
| "step": 945, | |
| "valid_targets_mean": 3678.4, | |
| "valid_targets_min": 1041 | |
| }, | |
| { | |
| "epoch": 2.3631840796019903, | |
| "grad_norm": 0.5200162752679304, | |
| "learning_rate": 3.353313487464217e-05, | |
| "loss": 0.2204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21824005246162415, | |
| "step": 950, | |
| "valid_targets_mean": 4523.8, | |
| "valid_targets_min": 2556 | |
| }, | |
| { | |
| "epoch": 2.3756218905472637, | |
| "grad_norm": 0.5521210931060869, | |
| "learning_rate": 3.3441518389866075e-05, | |
| "loss": 0.2221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2285742461681366, | |
| "step": 955, | |
| "valid_targets_mean": 4061.0, | |
| "valid_targets_min": 933 | |
| }, | |
| { | |
| "epoch": 2.388059701492537, | |
| "grad_norm": 0.5509197209245766, | |
| "learning_rate": 3.334938458506599e-05, | |
| "loss": 0.2261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24184472858905792, | |
| "step": 960, | |
| "valid_targets_mean": 4080.4, | |
| "valid_targets_min": 1530 | |
| }, | |
| { | |
| "epoch": 2.400497512437811, | |
| "grad_norm": 0.5236287052530746, | |
| "learning_rate": 3.325673700617035e-05, | |
| "loss": 0.2226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20668141543865204, | |
| "step": 965, | |
| "valid_targets_mean": 4257.0, | |
| "valid_targets_min": 1666 | |
| }, | |
| { | |
| "epoch": 2.412935323383085, | |
| "grad_norm": 0.6083605097982535, | |
| "learning_rate": 3.316357921888104e-05, | |
| "loss": 0.2263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2471916675567627, | |
| "step": 970, | |
| "valid_targets_mean": 3378.9, | |
| "valid_targets_min": 1597 | |
| }, | |
| { | |
| "epoch": 2.425373134328358, | |
| "grad_norm": 0.5335881223915271, | |
| "learning_rate": 3.306991480853624e-05, | |
| "loss": 0.227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20531508326530457, | |
| "step": 975, | |
| "valid_targets_mean": 4332.2, | |
| "valid_targets_min": 1927 | |
| }, | |
| { | |
| "epoch": 2.4378109452736316, | |
| "grad_norm": 0.491874909775028, | |
| "learning_rate": 3.2975747379972345e-05, | |
| "loss": 0.2167, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21635055541992188, | |
| "step": 980, | |
| "valid_targets_mean": 4316.7, | |
| "valid_targets_min": 1860 | |
| }, | |
| { | |
| "epoch": 2.4502487562189055, | |
| "grad_norm": 0.5761426750570783, | |
| "learning_rate": 3.288108055738531e-05, | |
| "loss": 0.2217, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2470484972000122, | |
| "step": 985, | |
| "valid_targets_mean": 3830.1, | |
| "valid_targets_min": 1418 | |
| }, | |
| { | |
| "epoch": 2.4626865671641793, | |
| "grad_norm": 0.5320370757070529, | |
| "learning_rate": 3.278591798419112e-05, | |
| "loss": 0.2195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2289409041404724, | |
| "step": 990, | |
| "valid_targets_mean": 4497.8, | |
| "valid_targets_min": 1957 | |
| }, | |
| { | |
| "epoch": 2.4751243781094527, | |
| "grad_norm": 0.5771119980643729, | |
| "learning_rate": 3.2690263322885564e-05, | |
| "loss": 0.2329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23160231113433838, | |
| "step": 995, | |
| "valid_targets_mean": 3809.6, | |
| "valid_targets_min": 1732 | |
| }, | |
| { | |
| "epoch": 2.487562189054726, | |
| "grad_norm": 0.5428978241749973, | |
| "learning_rate": 3.259412025490331e-05, | |
| "loss": 0.2219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22234313189983368, | |
| "step": 1000, | |
| "valid_targets_mean": 3825.9, | |
| "valid_targets_min": 2537 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.4988498596419294, | |
| "learning_rate": 3.249749248047619e-05, | |
| "loss": 0.2312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2289588749408722, | |
| "step": 1005, | |
| "valid_targets_mean": 4525.5, | |
| "valid_targets_min": 2281 | |
| }, | |
| { | |
| "epoch": 2.512437810945274, | |
| "grad_norm": 0.5359144087715436, | |
| "learning_rate": 3.24003837184908e-05, | |
| "loss": 0.227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.221797913312912, | |
| "step": 1010, | |
| "valid_targets_mean": 3942.2, | |
| "valid_targets_min": 1802 | |
| }, | |
| { | |
| "epoch": 2.5248756218905473, | |
| "grad_norm": 0.5568260899912753, | |
| "learning_rate": 3.230279770634538e-05, | |
| "loss": 0.2276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21767503023147583, | |
| "step": 1015, | |
| "valid_targets_mean": 4057.7, | |
| "valid_targets_min": 2287 | |
| }, | |
| { | |
| "epoch": 2.5373134328358207, | |
| "grad_norm": 0.6004807280832435, | |
| "learning_rate": 3.220473819980594e-05, | |
| "loss": 0.2321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23624536395072937, | |
| "step": 1020, | |
| "valid_targets_mean": 3945.8, | |
| "valid_targets_min": 1124 | |
| }, | |
| { | |
| "epoch": 2.5497512437810945, | |
| "grad_norm": 0.5323221469131362, | |
| "learning_rate": 3.2106208972861775e-05, | |
| "loss": 0.2309, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22079509496688843, | |
| "step": 1025, | |
| "valid_targets_mean": 4226.3, | |
| "valid_targets_min": 2281 | |
| }, | |
| { | |
| "epoch": 2.5621890547263684, | |
| "grad_norm": 0.5648978978257522, | |
| "learning_rate": 3.2007213817580165e-05, | |
| "loss": 0.2162, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22475269436836243, | |
| "step": 1030, | |
| "valid_targets_mean": 4073.6, | |
| "valid_targets_min": 2117 | |
| }, | |
| { | |
| "epoch": 2.574626865671642, | |
| "grad_norm": 0.5796358749507854, | |
| "learning_rate": 3.1907756543960425e-05, | |
| "loss": 0.2162, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23154105246067047, | |
| "step": 1035, | |
| "valid_targets_mean": 3477.8, | |
| "valid_targets_min": 1170 | |
| }, | |
| { | |
| "epoch": 2.587064676616915, | |
| "grad_norm": 0.5229192190104138, | |
| "learning_rate": 3.180784097978732e-05, | |
| "loss": 0.2195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20573227107524872, | |
| "step": 1040, | |
| "valid_targets_mean": 4331.4, | |
| "valid_targets_min": 2708 | |
| }, | |
| { | |
| "epoch": 2.599502487562189, | |
| "grad_norm": 0.8210781641217509, | |
| "learning_rate": 3.1707470970483716e-05, | |
| "loss": 0.2203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21882575750350952, | |
| "step": 1045, | |
| "valid_targets_mean": 4022.0, | |
| "valid_targets_min": 1396 | |
| }, | |
| { | |
| "epoch": 2.611940298507463, | |
| "grad_norm": 0.4797756242590163, | |
| "learning_rate": 3.160665037896256e-05, | |
| "loss": 0.2199, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21537092328071594, | |
| "step": 1050, | |
| "valid_targets_mean": 5050.7, | |
| "valid_targets_min": 2642 | |
| }, | |
| { | |
| "epoch": 2.6243781094527363, | |
| "grad_norm": 0.5768753494873544, | |
| "learning_rate": 3.150538308547826e-05, | |
| "loss": 0.2376, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2303762435913086, | |
| "step": 1055, | |
| "valid_targets_mean": 3295.4, | |
| "valid_targets_min": 840 | |
| }, | |
| { | |
| "epoch": 2.6368159203980097, | |
| "grad_norm": 0.5551715330405659, | |
| "learning_rate": 3.14036729874773e-05, | |
| "loss": 0.2281, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2432781457901001, | |
| "step": 1060, | |
| "valid_targets_mean": 3878.2, | |
| "valid_targets_min": 1155 | |
| }, | |
| { | |
| "epoch": 2.6492537313432836, | |
| "grad_norm": 0.5533747207127068, | |
| "learning_rate": 3.130152399944827e-05, | |
| "loss": 0.2247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21668297052383423, | |
| "step": 1065, | |
| "valid_targets_mean": 3560.9, | |
| "valid_targets_min": 1236 | |
| }, | |
| { | |
| "epoch": 2.6616915422885574, | |
| "grad_norm": 0.6091887065700332, | |
| "learning_rate": 3.1198940052771196e-05, | |
| "loss": 0.2243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22194230556488037, | |
| "step": 1070, | |
| "valid_targets_mean": 3523.8, | |
| "valid_targets_min": 1943 | |
| }, | |
| { | |
| "epoch": 2.674129353233831, | |
| "grad_norm": 0.5217859543197857, | |
| "learning_rate": 3.109592509556625e-05, | |
| "loss": 0.2219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19868099689483643, | |
| "step": 1075, | |
| "valid_targets_mean": 3734.4, | |
| "valid_targets_min": 1771 | |
| }, | |
| { | |
| "epoch": 2.6865671641791042, | |
| "grad_norm": 0.5907560446630902, | |
| "learning_rate": 3.0992483092541757e-05, | |
| "loss": 0.2321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22035574913024902, | |
| "step": 1080, | |
| "valid_targets_mean": 4186.8, | |
| "valid_targets_min": 1370 | |
| }, | |
| { | |
| "epoch": 2.699004975124378, | |
| "grad_norm": 0.5354879248422757, | |
| "learning_rate": 3.088861802484168e-05, | |
| "loss": 0.2178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22888147830963135, | |
| "step": 1085, | |
| "valid_targets_mean": 4406.8, | |
| "valid_targets_min": 1937 | |
| }, | |
| { | |
| "epoch": 2.711442786069652, | |
| "grad_norm": 0.5354309571052726, | |
| "learning_rate": 3.078433388989232e-05, | |
| "loss": 0.225, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23584789037704468, | |
| "step": 1090, | |
| "valid_targets_mean": 4082.8, | |
| "valid_targets_min": 2016 | |
| }, | |
| { | |
| "epoch": 2.7238805970149254, | |
| "grad_norm": 0.5230105200073858, | |
| "learning_rate": 3.067963470124852e-05, | |
| "loss": 0.2187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21761374175548553, | |
| "step": 1095, | |
| "valid_targets_mean": 4048.8, | |
| "valid_targets_min": 1515 | |
| }, | |
| { | |
| "epoch": 2.7363184079601988, | |
| "grad_norm": 0.5254789832854005, | |
| "learning_rate": 3.0574524488439166e-05, | |
| "loss": 0.2158, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21649402379989624, | |
| "step": 1100, | |
| "valid_targets_mean": 3900.8, | |
| "valid_targets_min": 1840 | |
| }, | |
| { | |
| "epoch": 2.7487562189054726, | |
| "grad_norm": 0.5512516143404952, | |
| "learning_rate": 3.046900729681215e-05, | |
| "loss": 0.2148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2197253406047821, | |
| "step": 1105, | |
| "valid_targets_mean": 3859.6, | |
| "valid_targets_min": 1241 | |
| }, | |
| { | |
| "epoch": 2.7611940298507465, | |
| "grad_norm": 0.4780381184240064, | |
| "learning_rate": 3.0363087187378618e-05, | |
| "loss": 0.2167, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21492695808410645, | |
| "step": 1110, | |
| "valid_targets_mean": 4405.6, | |
| "valid_targets_min": 2322 | |
| }, | |
| { | |
| "epoch": 2.77363184079602, | |
| "grad_norm": 0.6383173460840905, | |
| "learning_rate": 3.025676823665671e-05, | |
| "loss": 0.2196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23645994067192078, | |
| "step": 1115, | |
| "valid_targets_mean": 3986.8, | |
| "valid_targets_min": 2007 | |
| }, | |
| { | |
| "epoch": 2.7860696517412933, | |
| "grad_norm": 0.5606483903400612, | |
| "learning_rate": 3.0150054536514655e-05, | |
| "loss": 0.2268, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2141602337360382, | |
| "step": 1120, | |
| "valid_targets_mean": 3552.6, | |
| "valid_targets_min": 196 | |
| }, | |
| { | |
| "epoch": 2.798507462686567, | |
| "grad_norm": 0.5372938232844723, | |
| "learning_rate": 3.0042950194013313e-05, | |
| "loss": 0.2198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22424790263175964, | |
| "step": 1125, | |
| "valid_targets_mean": 4286.3, | |
| "valid_targets_min": 2746 | |
| }, | |
| { | |
| "epoch": 2.810945273631841, | |
| "grad_norm": 0.647264121448103, | |
| "learning_rate": 2.993545933124807e-05, | |
| "loss": 0.2335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2541235089302063, | |
| "step": 1130, | |
| "valid_targets_mean": 3984.4, | |
| "valid_targets_min": 1033 | |
| }, | |
| { | |
| "epoch": 2.8233830845771144, | |
| "grad_norm": 0.5395140967298383, | |
| "learning_rate": 2.9827586085190217e-05, | |
| "loss": 0.2251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20689839124679565, | |
| "step": 1135, | |
| "valid_targets_mean": 3842.4, | |
| "valid_targets_min": 1140 | |
| }, | |
| { | |
| "epoch": 2.835820895522388, | |
| "grad_norm": 0.5132101606150065, | |
| "learning_rate": 2.971933460752773e-05, | |
| "loss": 0.2237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2214384526014328, | |
| "step": 1140, | |
| "valid_targets_mean": 4829.9, | |
| "valid_targets_min": 1793 | |
| }, | |
| { | |
| "epoch": 2.8482587064676617, | |
| "grad_norm": 0.545837953147779, | |
| "learning_rate": 2.961070906450548e-05, | |
| "loss": 0.2229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24275824427604675, | |
| "step": 1145, | |
| "valid_targets_mean": 4090.0, | |
| "valid_targets_min": 1876 | |
| }, | |
| { | |
| "epoch": 2.8606965174129355, | |
| "grad_norm": 0.545483447551859, | |
| "learning_rate": 2.950171363676488e-05, | |
| "loss": 0.2225, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21436379849910736, | |
| "step": 1150, | |
| "valid_targets_mean": 3576.0, | |
| "valid_targets_min": 1297 | |
| }, | |
| { | |
| "epoch": 2.873134328358209, | |
| "grad_norm": 0.5046264457103875, | |
| "learning_rate": 2.9392352519183003e-05, | |
| "loss": 0.2266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2066916823387146, | |
| "step": 1155, | |
| "valid_targets_mean": 4762.8, | |
| "valid_targets_min": 2454 | |
| }, | |
| { | |
| "epoch": 2.8855721393034823, | |
| "grad_norm": 0.5186125948476298, | |
| "learning_rate": 2.928262992071113e-05, | |
| "loss": 0.2123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2164272964000702, | |
| "step": 1160, | |
| "valid_targets_mean": 3770.9, | |
| "valid_targets_min": 1244 | |
| }, | |
| { | |
| "epoch": 2.898009950248756, | |
| "grad_norm": 0.5004644338207715, | |
| "learning_rate": 2.9172550064212747e-05, | |
| "loss": 0.2185, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1974021941423416, | |
| "step": 1165, | |
| "valid_targets_mean": 4147.8, | |
| "valid_targets_min": 2450 | |
| }, | |
| { | |
| "epoch": 2.91044776119403, | |
| "grad_norm": 0.5284009585202385, | |
| "learning_rate": 2.906211718630104e-05, | |
| "loss": 0.2219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23019076883792877, | |
| "step": 1170, | |
| "valid_targets_mean": 3926.6, | |
| "valid_targets_min": 2084 | |
| }, | |
| { | |
| "epoch": 2.9228855721393034, | |
| "grad_norm": 0.5437530454710895, | |
| "learning_rate": 2.895133553717582e-05, | |
| "loss": 0.2186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22214023768901825, | |
| "step": 1175, | |
| "valid_targets_mean": 3957.7, | |
| "valid_targets_min": 1130 | |
| }, | |
| { | |
| "epoch": 2.935323383084577, | |
| "grad_norm": 0.5309034940862213, | |
| "learning_rate": 2.8840209380459983e-05, | |
| "loss": 0.2174, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2027333378791809, | |
| "step": 1180, | |
| "valid_targets_mean": 3732.1, | |
| "valid_targets_min": 1670 | |
| }, | |
| { | |
| "epoch": 2.9477611940298507, | |
| "grad_norm": 0.502018444365329, | |
| "learning_rate": 2.8728742993035376e-05, | |
| "loss": 0.2162, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20462360978126526, | |
| "step": 1185, | |
| "valid_targets_mean": 3885.9, | |
| "valid_targets_min": 2023 | |
| }, | |
| { | |
| "epoch": 2.9601990049751246, | |
| "grad_norm": 0.5097178679874955, | |
| "learning_rate": 2.8616940664878217e-05, | |
| "loss": 0.2298, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21697774529457092, | |
| "step": 1190, | |
| "valid_targets_mean": 4445.0, | |
| "valid_targets_min": 1363 | |
| }, | |
| { | |
| "epoch": 2.972636815920398, | |
| "grad_norm": 0.568255587186467, | |
| "learning_rate": 2.850480669889397e-05, | |
| "loss": 0.2346, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2217128425836563, | |
| "step": 1195, | |
| "valid_targets_mean": 3645.9, | |
| "valid_targets_min": 2013 | |
| }, | |
| { | |
| "epoch": 2.9850746268656714, | |
| "grad_norm": 0.5435581128114583, | |
| "learning_rate": 2.839234541075178e-05, | |
| "loss": 0.2137, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23033751547336578, | |
| "step": 1200, | |
| "valid_targets_mean": 3913.1, | |
| "valid_targets_min": 428 | |
| }, | |
| { | |
| "epoch": 2.9975124378109452, | |
| "grad_norm": 0.5344645879564283, | |
| "learning_rate": 2.8279561128718324e-05, | |
| "loss": 0.227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1959632784128189, | |
| "step": 1205, | |
| "valid_targets_mean": 3608.3, | |
| "valid_targets_min": 2089 | |
| }, | |
| { | |
| "epoch": 3.009950248756219, | |
| "grad_norm": 0.49623627025746214, | |
| "learning_rate": 2.8166458193491287e-05, | |
| "loss": 0.2089, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20732742547988892, | |
| "step": 1210, | |
| "valid_targets_mean": 4868.1, | |
| "valid_targets_min": 2028 | |
| }, | |
| { | |
| "epoch": 3.0223880597014925, | |
| "grad_norm": 0.5737814175353435, | |
| "learning_rate": 2.805304095803222e-05, | |
| "loss": 0.2037, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21487167477607727, | |
| "step": 1215, | |
| "valid_targets_mean": 4191.0, | |
| "valid_targets_min": 1524 | |
| }, | |
| { | |
| "epoch": 3.0348258706467663, | |
| "grad_norm": 0.5106023876957765, | |
| "learning_rate": 2.7939313787399118e-05, | |
| "loss": 0.2221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2161100208759308, | |
| "step": 1220, | |
| "valid_targets_mean": 4569.1, | |
| "valid_targets_min": 2916 | |
| }, | |
| { | |
| "epoch": 3.0472636815920398, | |
| "grad_norm": 0.5564896880436795, | |
| "learning_rate": 2.7825281058578326e-05, | |
| "loss": 0.2061, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2229604572057724, | |
| "step": 1225, | |
| "valid_targets_mean": 4257.8, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 3.0597014925373136, | |
| "grad_norm": 0.557147418201146, | |
| "learning_rate": 2.7710947160316145e-05, | |
| "loss": 0.2019, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20052112638950348, | |
| "step": 1230, | |
| "valid_targets_mean": 4028.7, | |
| "valid_targets_min": 1645 | |
| }, | |
| { | |
| "epoch": 3.072139303482587, | |
| "grad_norm": 0.5878465938206581, | |
| "learning_rate": 2.759631649294989e-05, | |
| "loss": 0.2057, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22095069289207458, | |
| "step": 1235, | |
| "valid_targets_mean": 4023.8, | |
| "valid_targets_min": 1998 | |
| }, | |
| { | |
| "epoch": 3.084577114427861, | |
| "grad_norm": 0.5194814837667161, | |
| "learning_rate": 2.7481393468238558e-05, | |
| "loss": 0.2008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19190478324890137, | |
| "step": 1240, | |
| "valid_targets_mean": 4377.2, | |
| "valid_targets_min": 2378 | |
| }, | |
| { | |
| "epoch": 3.0970149253731343, | |
| "grad_norm": 0.5898646554241795, | |
| "learning_rate": 2.7366182509193e-05, | |
| "loss": 0.2065, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20672670006752014, | |
| "step": 1245, | |
| "valid_targets_mean": 3683.2, | |
| "valid_targets_min": 1530 | |
| }, | |
| { | |
| "epoch": 3.109452736318408, | |
| "grad_norm": 0.5408382733969814, | |
| "learning_rate": 2.725068804990575e-05, | |
| "loss": 0.2145, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22049081325531006, | |
| "step": 1250, | |
| "valid_targets_mean": 4423.6, | |
| "valid_targets_min": 1725 | |
| }, | |
| { | |
| "epoch": 3.1218905472636815, | |
| "grad_norm": 0.5240915497470742, | |
| "learning_rate": 2.7134914535380305e-05, | |
| "loss": 0.2081, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19481924176216125, | |
| "step": 1255, | |
| "valid_targets_mean": 4028.4, | |
| "valid_targets_min": 831 | |
| }, | |
| { | |
| "epoch": 3.1343283582089554, | |
| "grad_norm": 0.586082749576896, | |
| "learning_rate": 2.7018866421360114e-05, | |
| "loss": 0.2181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22566986083984375, | |
| "step": 1260, | |
| "valid_targets_mean": 3726.2, | |
| "valid_targets_min": 844 | |
| }, | |
| { | |
| "epoch": 3.146766169154229, | |
| "grad_norm": 0.5224048615979382, | |
| "learning_rate": 2.6902548174157028e-05, | |
| "loss": 0.2063, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.219328373670578, | |
| "step": 1265, | |
| "valid_targets_mean": 4112.4, | |
| "valid_targets_min": 1498 | |
| }, | |
| { | |
| "epoch": 3.1592039800995027, | |
| "grad_norm": 0.501751299595383, | |
| "learning_rate": 2.6785964270479472e-05, | |
| "loss": 0.2027, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1844957172870636, | |
| "step": 1270, | |
| "valid_targets_mean": 4384.2, | |
| "valid_targets_min": 2813 | |
| }, | |
| { | |
| "epoch": 3.171641791044776, | |
| "grad_norm": 0.5831574633686978, | |
| "learning_rate": 2.66691191972601e-05, | |
| "loss": 0.2103, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2090991735458374, | |
| "step": 1275, | |
| "valid_targets_mean": 3494.6, | |
| "valid_targets_min": 1780 | |
| }, | |
| { | |
| "epoch": 3.18407960199005, | |
| "grad_norm": 0.5850385454163992, | |
| "learning_rate": 2.6552017451483136e-05, | |
| "loss": 0.2028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21853281557559967, | |
| "step": 1280, | |
| "valid_targets_mean": 4045.5, | |
| "valid_targets_min": 1823 | |
| }, | |
| { | |
| "epoch": 3.1965174129353233, | |
| "grad_norm": 0.5599684535438738, | |
| "learning_rate": 2.6434663540011278e-05, | |
| "loss": 0.2087, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.219620943069458, | |
| "step": 1285, | |
| "valid_targets_mean": 4112.5, | |
| "valid_targets_min": 2313 | |
| }, | |
| { | |
| "epoch": 3.208955223880597, | |
| "grad_norm": 0.9513235022639203, | |
| "learning_rate": 2.631706197941227e-05, | |
| "loss": 0.2063, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1958315670490265, | |
| "step": 1290, | |
| "valid_targets_mean": 4007.0, | |
| "valid_targets_min": 1381 | |
| }, | |
| { | |
| "epoch": 3.2213930348258706, | |
| "grad_norm": 0.548449991410438, | |
| "learning_rate": 2.619921729578504e-05, | |
| "loss": 0.2016, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20220553874969482, | |
| "step": 1295, | |
| "valid_targets_mean": 3707.1, | |
| "valid_targets_min": 1997 | |
| }, | |
| { | |
| "epoch": 3.2338308457711444, | |
| "grad_norm": 0.5435395454910702, | |
| "learning_rate": 2.6081134024585558e-05, | |
| "loss": 0.2018, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19983190298080444, | |
| "step": 1300, | |
| "valid_targets_mean": 3682.4, | |
| "valid_targets_min": 1533 | |
| }, | |
| { | |
| "epoch": 3.246268656716418, | |
| "grad_norm": 0.5396400851922742, | |
| "learning_rate": 2.5962816710452217e-05, | |
| "loss": 0.2086, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19172373414039612, | |
| "step": 1305, | |
| "valid_targets_mean": 4202.8, | |
| "valid_targets_min": 1828 | |
| }, | |
| { | |
| "epoch": 3.2587064676616917, | |
| "grad_norm": 0.5528015303255912, | |
| "learning_rate": 2.5844269907030972e-05, | |
| "loss": 0.2008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21026234328746796, | |
| "step": 1310, | |
| "valid_targets_mean": 3956.2, | |
| "valid_targets_min": 2237 | |
| }, | |
| { | |
| "epoch": 3.271144278606965, | |
| "grad_norm": 0.6555416969802464, | |
| "learning_rate": 2.5725498176800053e-05, | |
| "loss": 0.2059, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20781803131103516, | |
| "step": 1315, | |
| "valid_targets_mean": 3484.7, | |
| "valid_targets_min": 1823 | |
| }, | |
| { | |
| "epoch": 3.283582089552239, | |
| "grad_norm": 0.5520065160976478, | |
| "learning_rate": 2.560650609089441e-05, | |
| "loss": 0.2113, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21545690298080444, | |
| "step": 1320, | |
| "valid_targets_mean": 3796.1, | |
| "valid_targets_min": 2150 | |
| }, | |
| { | |
| "epoch": 3.2960199004975124, | |
| "grad_norm": 0.5594356265872277, | |
| "learning_rate": 2.5487298228929746e-05, | |
| "loss": 0.1982, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19716203212738037, | |
| "step": 1325, | |
| "valid_targets_mean": 4177.7, | |
| "valid_targets_min": 2666 | |
| }, | |
| { | |
| "epoch": 3.308457711442786, | |
| "grad_norm": 0.5673243925575643, | |
| "learning_rate": 2.5367879178826278e-05, | |
| "loss": 0.2106, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21999700367450714, | |
| "step": 1330, | |
| "valid_targets_mean": 3577.8, | |
| "valid_targets_min": 1379 | |
| }, | |
| { | |
| "epoch": 3.3208955223880596, | |
| "grad_norm": 0.5112451211401966, | |
| "learning_rate": 2.5248253536632176e-05, | |
| "loss": 0.2055, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20441888272762299, | |
| "step": 1335, | |
| "valid_targets_mean": 4252.4, | |
| "valid_targets_min": 1758 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 0.5701315285018725, | |
| "learning_rate": 2.512842590634664e-05, | |
| "loss": 0.2116, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.228561133146286, | |
| "step": 1340, | |
| "valid_targets_mean": 3937.6, | |
| "valid_targets_min": 1865 | |
| }, | |
| { | |
| "epoch": 3.345771144278607, | |
| "grad_norm": 0.5421666508199918, | |
| "learning_rate": 2.5008400899742757e-05, | |
| "loss": 0.1953, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20633219182491302, | |
| "step": 1345, | |
| "valid_targets_mean": 4439.9, | |
| "valid_targets_min": 3260 | |
| }, | |
| { | |
| "epoch": 3.3582089552238807, | |
| "grad_norm": 0.49279685109536914, | |
| "learning_rate": 2.4888183136189952e-05, | |
| "loss": 0.2082, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1922161877155304, | |
| "step": 1350, | |
| "valid_targets_mean": 4147.8, | |
| "valid_targets_min": 2538 | |
| }, | |
| { | |
| "epoch": 3.370646766169154, | |
| "grad_norm": 0.4688762074597621, | |
| "learning_rate": 2.4767777242476263e-05, | |
| "loss": 0.1997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19103196263313293, | |
| "step": 1355, | |
| "valid_targets_mean": 5005.8, | |
| "valid_targets_min": 2533 | |
| }, | |
| { | |
| "epoch": 3.383084577114428, | |
| "grad_norm": 0.5500017320569016, | |
| "learning_rate": 2.4647187852630227e-05, | |
| "loss": 0.2028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20763391256332397, | |
| "step": 1360, | |
| "valid_targets_mean": 3853.6, | |
| "valid_targets_min": 1548 | |
| }, | |
| { | |
| "epoch": 3.3955223880597014, | |
| "grad_norm": 0.5872147896176038, | |
| "learning_rate": 2.4526419607742543e-05, | |
| "loss": 0.2044, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22904819250106812, | |
| "step": 1365, | |
| "valid_targets_mean": 3977.2, | |
| "valid_targets_min": 2209 | |
| }, | |
| { | |
| "epoch": 3.4079601990049753, | |
| "grad_norm": 0.5155674931487689, | |
| "learning_rate": 2.4405477155787457e-05, | |
| "loss": 0.2084, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21794819831848145, | |
| "step": 1370, | |
| "valid_targets_mean": 4300.1, | |
| "valid_targets_min": 1567 | |
| }, | |
| { | |
| "epoch": 3.4203980099502487, | |
| "grad_norm": 0.5958084420992633, | |
| "learning_rate": 2.4284365151443892e-05, | |
| "loss": 0.2095, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21897193789482117, | |
| "step": 1375, | |
| "valid_targets_mean": 3848.8, | |
| "valid_targets_min": 1201 | |
| }, | |
| { | |
| "epoch": 3.4328358208955225, | |
| "grad_norm": 0.5269624601178255, | |
| "learning_rate": 2.416308825591626e-05, | |
| "loss": 0.2018, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1867186725139618, | |
| "step": 1380, | |
| "valid_targets_mean": 4206.9, | |
| "valid_targets_min": 1330 | |
| }, | |
| { | |
| "epoch": 3.445273631840796, | |
| "grad_norm": 0.5745335400755156, | |
| "learning_rate": 2.4041651136755112e-05, | |
| "loss": 0.208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21894606947898865, | |
| "step": 1385, | |
| "valid_targets_mean": 4212.0, | |
| "valid_targets_min": 1793 | |
| }, | |
| { | |
| "epoch": 3.45771144278607, | |
| "grad_norm": 0.5062989296925714, | |
| "learning_rate": 2.3920058467677475e-05, | |
| "loss": 0.2, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19788777828216553, | |
| "step": 1390, | |
| "valid_targets_mean": 4500.6, | |
| "valid_targets_min": 1771 | |
| }, | |
| { | |
| "epoch": 3.470149253731343, | |
| "grad_norm": 0.5291231334084273, | |
| "learning_rate": 2.3798314928386986e-05, | |
| "loss": 0.207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19889307022094727, | |
| "step": 1395, | |
| "valid_targets_mean": 4059.6, | |
| "valid_targets_min": 1629 | |
| }, | |
| { | |
| "epoch": 3.482587064676617, | |
| "grad_norm": 0.6204408757812566, | |
| "learning_rate": 2.367642520439378e-05, | |
| "loss": 0.2072, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2038464993238449, | |
| "step": 1400, | |
| "valid_targets_mean": 3189.2, | |
| "valid_targets_min": 941 | |
| }, | |
| { | |
| "epoch": 3.4950248756218905, | |
| "grad_norm": 0.5860309965160757, | |
| "learning_rate": 2.3554393986834173e-05, | |
| "loss": 0.2109, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18992196023464203, | |
| "step": 1405, | |
| "valid_targets_mean": 4013.2, | |
| "valid_targets_min": 777 | |
| }, | |
| { | |
| "epoch": 3.5074626865671643, | |
| "grad_norm": 0.5323469940034676, | |
| "learning_rate": 2.3432225972290086e-05, | |
| "loss": 0.1953, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1906590759754181, | |
| "step": 1410, | |
| "valid_targets_mean": 4144.9, | |
| "valid_targets_min": 1661 | |
| }, | |
| { | |
| "epoch": 3.5199004975124377, | |
| "grad_norm": 0.5720751778006824, | |
| "learning_rate": 2.3309925862608318e-05, | |
| "loss": 0.204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1970103681087494, | |
| "step": 1415, | |
| "valid_targets_mean": 3570.9, | |
| "valid_targets_min": 1244 | |
| }, | |
| { | |
| "epoch": 3.5323383084577116, | |
| "grad_norm": 0.5077487997895391, | |
| "learning_rate": 2.3187498364719576e-05, | |
| "loss": 0.2041, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1936759650707245, | |
| "step": 1420, | |
| "valid_targets_mean": 4038.2, | |
| "valid_targets_min": 2313 | |
| }, | |
| { | |
| "epoch": 3.544776119402985, | |
| "grad_norm": 0.5828341436838554, | |
| "learning_rate": 2.3064948190457335e-05, | |
| "loss": 0.2046, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18977367877960205, | |
| "step": 1425, | |
| "valid_targets_mean": 3396.9, | |
| "valid_targets_min": 2106 | |
| }, | |
| { | |
| "epoch": 3.557213930348259, | |
| "grad_norm": 0.5735838721354387, | |
| "learning_rate": 2.2942280056376457e-05, | |
| "loss": 0.2033, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20034313201904297, | |
| "step": 1430, | |
| "valid_targets_mean": 3937.6, | |
| "valid_targets_min": 1627 | |
| }, | |
| { | |
| "epoch": 3.5696517412935322, | |
| "grad_norm": 0.5234257291474523, | |
| "learning_rate": 2.2819498683571718e-05, | |
| "loss": 0.2021, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20040658116340637, | |
| "step": 1435, | |
| "valid_targets_mean": 4346.8, | |
| "valid_targets_min": 2323 | |
| }, | |
| { | |
| "epoch": 3.582089552238806, | |
| "grad_norm": 0.5271233727171688, | |
| "learning_rate": 2.2696608797496045e-05, | |
| "loss": 0.2132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1866839975118637, | |
| "step": 1440, | |
| "valid_targets_mean": 3945.6, | |
| "valid_targets_min": 1378 | |
| }, | |
| { | |
| "epoch": 3.5945273631840795, | |
| "grad_norm": 0.5242920703023999, | |
| "learning_rate": 2.2573615127778733e-05, | |
| "loss": 0.1984, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19095249474048615, | |
| "step": 1445, | |
| "valid_targets_mean": 3907.6, | |
| "valid_targets_min": 1780 | |
| }, | |
| { | |
| "epoch": 3.6069651741293534, | |
| "grad_norm": 0.6005933256936844, | |
| "learning_rate": 2.2450522408043324e-05, | |
| "loss": 0.2128, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2235599309206009, | |
| "step": 1450, | |
| "valid_targets_mean": 3177.5, | |
| "valid_targets_min": 992 | |
| }, | |
| { | |
| "epoch": 3.6194029850746268, | |
| "grad_norm": 0.5536958757152657, | |
| "learning_rate": 2.232733537572551e-05, | |
| "loss": 0.2009, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20974500477313995, | |
| "step": 1455, | |
| "valid_targets_mean": 3999.3, | |
| "valid_targets_min": 1643 | |
| }, | |
| { | |
| "epoch": 3.6318407960199006, | |
| "grad_norm": 0.45147835839349537, | |
| "learning_rate": 2.2204058771890735e-05, | |
| "loss": 0.2029, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19078528881072998, | |
| "step": 1460, | |
| "valid_targets_mean": 4792.8, | |
| "valid_targets_min": 1685 | |
| }, | |
| { | |
| "epoch": 3.644278606965174, | |
| "grad_norm": 0.6125570787690607, | |
| "learning_rate": 2.2080697341051777e-05, | |
| "loss": 0.2091, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21356597542762756, | |
| "step": 1465, | |
| "valid_targets_mean": 3559.8, | |
| "valid_targets_min": 2038 | |
| }, | |
| { | |
| "epoch": 3.656716417910448, | |
| "grad_norm": 0.5118359172048365, | |
| "learning_rate": 2.195725583098611e-05, | |
| "loss": 0.2055, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20302622020244598, | |
| "step": 1470, | |
| "valid_targets_mean": 4243.6, | |
| "valid_targets_min": 2620 | |
| }, | |
| { | |
| "epoch": 3.6691542288557213, | |
| "grad_norm": 0.5524385866849025, | |
| "learning_rate": 2.183373899255321e-05, | |
| "loss": 0.2006, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1972339153289795, | |
| "step": 1475, | |
| "valid_targets_mean": 3786.9, | |
| "valid_targets_min": 2602 | |
| }, | |
| { | |
| "epoch": 3.681592039800995, | |
| "grad_norm": 0.5519280039906248, | |
| "learning_rate": 2.171015157951169e-05, | |
| "loss": 0.2127, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23621603846549988, | |
| "step": 1480, | |
| "valid_targets_mean": 4710.5, | |
| "valid_targets_min": 2121 | |
| }, | |
| { | |
| "epoch": 3.6940298507462686, | |
| "grad_norm": 0.4961853443443449, | |
| "learning_rate": 2.158649834833636e-05, | |
| "loss": 0.2006, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21436795592308044, | |
| "step": 1485, | |
| "valid_targets_mean": 4526.8, | |
| "valid_targets_min": 2490 | |
| }, | |
| { | |
| "epoch": 3.7064676616915424, | |
| "grad_norm": 0.5544971340683077, | |
| "learning_rate": 2.146278405803512e-05, | |
| "loss": 0.2068, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20669138431549072, | |
| "step": 1490, | |
| "valid_targets_mean": 3391.8, | |
| "valid_targets_min": 228 | |
| }, | |
| { | |
| "epoch": 3.718905472636816, | |
| "grad_norm": 0.5059854792152241, | |
| "learning_rate": 2.133901346996588e-05, | |
| "loss": 0.2037, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2086849957704544, | |
| "step": 1495, | |
| "valid_targets_mean": 4446.4, | |
| "valid_targets_min": 2480 | |
| }, | |
| { | |
| "epoch": 3.7313432835820897, | |
| "grad_norm": 0.5008031450948552, | |
| "learning_rate": 2.1215191347653227e-05, | |
| "loss": 0.2011, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20051947236061096, | |
| "step": 1500, | |
| "valid_targets_mean": 4183.6, | |
| "valid_targets_min": 1422 | |
| }, | |
| { | |
| "epoch": 3.743781094527363, | |
| "grad_norm": 0.5016740412477303, | |
| "learning_rate": 2.109132245660517e-05, | |
| "loss": 0.2046, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2014276683330536, | |
| "step": 1505, | |
| "valid_targets_mean": 4293.1, | |
| "valid_targets_min": 2065 | |
| }, | |
| { | |
| "epoch": 3.756218905472637, | |
| "grad_norm": 0.5773401034416584, | |
| "learning_rate": 2.096741156412967e-05, | |
| "loss": 0.2171, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24956290423870087, | |
| "step": 1510, | |
| "valid_targets_mean": 4016.8, | |
| "valid_targets_min": 2328 | |
| }, | |
| { | |
| "epoch": 3.7686567164179103, | |
| "grad_norm": 0.5557509017649421, | |
| "learning_rate": 2.084346343915118e-05, | |
| "loss": 0.2061, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21007095277309418, | |
| "step": 1515, | |
| "valid_targets_mean": 4184.1, | |
| "valid_targets_min": 2042 | |
| }, | |
| { | |
| "epoch": 3.781094527363184, | |
| "grad_norm": 0.5069995725105243, | |
| "learning_rate": 2.0719482852027122e-05, | |
| "loss": 0.1948, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19801217317581177, | |
| "step": 1520, | |
| "valid_targets_mean": 4527.2, | |
| "valid_targets_min": 2586 | |
| }, | |
| { | |
| "epoch": 3.7935323383084576, | |
| "grad_norm": 0.5072359725349612, | |
| "learning_rate": 2.059547457436429e-05, | |
| "loss": 0.2089, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21030068397521973, | |
| "step": 1525, | |
| "valid_targets_mean": 4119.2, | |
| "valid_targets_min": 1876 | |
| }, | |
| { | |
| "epoch": 3.8059701492537314, | |
| "grad_norm": 0.5809162860083377, | |
| "learning_rate": 2.0471443378835173e-05, | |
| "loss": 0.2037, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20429758727550507, | |
| "step": 1530, | |
| "valid_targets_mean": 3375.6, | |
| "valid_targets_min": 423 | |
| }, | |
| { | |
| "epoch": 3.818407960199005, | |
| "grad_norm": 0.6146168591855865, | |
| "learning_rate": 2.0347394038994305e-05, | |
| "loss": 0.1966, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20681461691856384, | |
| "step": 1535, | |
| "valid_targets_mean": 3487.4, | |
| "valid_targets_min": 1170 | |
| }, | |
| { | |
| "epoch": 3.8308457711442787, | |
| "grad_norm": 0.5243852804360046, | |
| "learning_rate": 2.0223331329094534e-05, | |
| "loss": 0.2002, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20334434509277344, | |
| "step": 1540, | |
| "valid_targets_mean": 3998.9, | |
| "valid_targets_min": 1988 | |
| }, | |
| { | |
| "epoch": 3.843283582089552, | |
| "grad_norm": 0.5291633419029611, | |
| "learning_rate": 2.0099260023903286e-05, | |
| "loss": 0.211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18365240097045898, | |
| "step": 1545, | |
| "valid_targets_mean": 4010.1, | |
| "valid_targets_min": 1927 | |
| }, | |
| { | |
| "epoch": 3.855721393034826, | |
| "grad_norm": 0.5121951623491455, | |
| "learning_rate": 1.997518489851878e-05, | |
| "loss": 0.2021, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20143933594226837, | |
| "step": 1550, | |
| "valid_targets_mean": 4428.3, | |
| "valid_targets_min": 2098 | |
| }, | |
| { | |
| "epoch": 3.8681592039800994, | |
| "grad_norm": 0.5327816071630699, | |
| "learning_rate": 1.985111072818626e-05, | |
| "loss": 0.2111, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2264530062675476, | |
| "step": 1555, | |
| "valid_targets_mean": 4157.4, | |
| "valid_targets_min": 1472 | |
| }, | |
| { | |
| "epoch": 3.8805970149253732, | |
| "grad_norm": 0.5872325233516333, | |
| "learning_rate": 1.9727042288114223e-05, | |
| "loss": 0.2003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21333108842372894, | |
| "step": 1560, | |
| "valid_targets_mean": 3713.8, | |
| "valid_targets_min": 1874 | |
| }, | |
| { | |
| "epoch": 3.8930348258706466, | |
| "grad_norm": 0.5465903196708187, | |
| "learning_rate": 1.9602984353290627e-05, | |
| "loss": 0.2041, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20426753163337708, | |
| "step": 1565, | |
| "valid_targets_mean": 3976.4, | |
| "valid_targets_min": 1752 | |
| }, | |
| { | |
| "epoch": 3.9054726368159205, | |
| "grad_norm": 0.48170818915584446, | |
| "learning_rate": 1.9478941698299108e-05, | |
| "loss": 0.2018, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19036681950092316, | |
| "step": 1570, | |
| "valid_targets_mean": 5013.6, | |
| "valid_targets_min": 1987 | |
| }, | |
| { | |
| "epoch": 3.917910447761194, | |
| "grad_norm": 0.5470516146085361, | |
| "learning_rate": 1.9354919097135233e-05, | |
| "loss": 0.2033, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20237231254577637, | |
| "step": 1575, | |
| "valid_targets_mean": 3921.6, | |
| "valid_targets_min": 2194 | |
| }, | |
| { | |
| "epoch": 3.9303482587064678, | |
| "grad_norm": 0.49608737593410446, | |
| "learning_rate": 1.9230921323022777e-05, | |
| "loss": 0.2004, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19408315420150757, | |
| "step": 1580, | |
| "valid_targets_mean": 4462.3, | |
| "valid_targets_min": 2876 | |
| }, | |
| { | |
| "epoch": 3.942786069651741, | |
| "grad_norm": 0.5215118271002951, | |
| "learning_rate": 1.9106953148229986e-05, | |
| "loss": 0.2063, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20697835087776184, | |
| "step": 1585, | |
| "valid_targets_mean": 4175.0, | |
| "valid_targets_min": 2020 | |
| }, | |
| { | |
| "epoch": 3.955223880597015, | |
| "grad_norm": 0.5461860904207121, | |
| "learning_rate": 1.8983019343885937e-05, | |
| "loss": 0.2003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2104094922542572, | |
| "step": 1590, | |
| "valid_targets_mean": 3759.0, | |
| "valid_targets_min": 1717 | |
| }, | |
| { | |
| "epoch": 3.9676616915422884, | |
| "grad_norm": 0.5231042143924045, | |
| "learning_rate": 1.8859124679796893e-05, | |
| "loss": 0.2049, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21831703186035156, | |
| "step": 1595, | |
| "valid_targets_mean": 4261.8, | |
| "valid_targets_min": 2399 | |
| }, | |
| { | |
| "epoch": 3.9800995024875623, | |
| "grad_norm": 0.5222686881020943, | |
| "learning_rate": 1.8735273924262727e-05, | |
| "loss": 0.2038, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2033132016658783, | |
| "step": 1600, | |
| "valid_targets_mean": 4387.1, | |
| "valid_targets_min": 2804 | |
| }, | |
| { | |
| "epoch": 3.9925373134328357, | |
| "grad_norm": 0.6234021033332222, | |
| "learning_rate": 1.8611471843893447e-05, | |
| "loss": 0.2024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19351506233215332, | |
| "step": 1605, | |
| "valid_targets_mean": 3928.3, | |
| "valid_targets_min": 1828 | |
| }, | |
| { | |
| "epoch": 4.0049751243781095, | |
| "grad_norm": 0.5228099382124228, | |
| "learning_rate": 1.848772320342568e-05, | |
| "loss": 0.1936, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1864427924156189, | |
| "step": 1610, | |
| "valid_targets_mean": 4102.9, | |
| "valid_targets_min": 2681 | |
| }, | |
| { | |
| "epoch": 4.017412935323383, | |
| "grad_norm": 0.5441470808752659, | |
| "learning_rate": 1.8364032765539355e-05, | |
| "loss": 0.2005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2033585011959076, | |
| "step": 1615, | |
| "valid_targets_mean": 4462.2, | |
| "valid_targets_min": 2005 | |
| }, | |
| { | |
| "epoch": 4.029850746268656, | |
| "grad_norm": 0.5740596060113697, | |
| "learning_rate": 1.8240405290674348e-05, | |
| "loss": 0.1899, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1972273290157318, | |
| "step": 1620, | |
| "valid_targets_mean": 3756.9, | |
| "valid_targets_min": 1255 | |
| }, | |
| { | |
| "epoch": 4.04228855721393, | |
| "grad_norm": 0.5719711729617718, | |
| "learning_rate": 1.8116845536847306e-05, | |
| "loss": 0.1935, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19884632527828217, | |
| "step": 1625, | |
| "valid_targets_mean": 4049.6, | |
| "valid_targets_min": 374 | |
| }, | |
| { | |
| "epoch": 4.054726368159204, | |
| "grad_norm": 0.4974824417143702, | |
| "learning_rate": 1.799335825946853e-05, | |
| "loss": 0.1904, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18048778176307678, | |
| "step": 1630, | |
| "valid_targets_mean": 4554.6, | |
| "valid_targets_min": 2925 | |
| }, | |
| { | |
| "epoch": 4.067164179104478, | |
| "grad_norm": 0.544654409825013, | |
| "learning_rate": 1.7869948211158898e-05, | |
| "loss": 0.1883, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17982539534568787, | |
| "step": 1635, | |
| "valid_targets_mean": 4247.0, | |
| "valid_targets_min": 1548 | |
| }, | |
| { | |
| "epoch": 4.079601990049751, | |
| "grad_norm": 0.5337906477339353, | |
| "learning_rate": 1.774662014156705e-05, | |
| "loss": 0.1874, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1838865578174591, | |
| "step": 1640, | |
| "valid_targets_mean": 3586.6, | |
| "valid_targets_min": 1801 | |
| }, | |
| { | |
| "epoch": 4.092039800995025, | |
| "grad_norm": 0.5334091034891687, | |
| "learning_rate": 1.762337879718649e-05, | |
| "loss": 0.1908, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20184041559696198, | |
| "step": 1645, | |
| "valid_targets_mean": 4037.2, | |
| "valid_targets_min": 2085 | |
| }, | |
| { | |
| "epoch": 4.104477611940299, | |
| "grad_norm": 0.48859337981552636, | |
| "learning_rate": 1.750022892117296e-05, | |
| "loss": 0.1844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17627070844173431, | |
| "step": 1650, | |
| "valid_targets_mean": 4640.4, | |
| "valid_targets_min": 2534 | |
| }, | |
| { | |
| "epoch": 4.116915422885572, | |
| "grad_norm": 0.5661329566773484, | |
| "learning_rate": 1.7377175253161907e-05, | |
| "loss": 0.1851, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17362037301063538, | |
| "step": 1655, | |
| "valid_targets_mean": 4436.4, | |
| "valid_targets_min": 2446 | |
| }, | |
| { | |
| "epoch": 4.129353233830845, | |
| "grad_norm": 0.5703288632207718, | |
| "learning_rate": 1.7254222529086024e-05, | |
| "loss": 0.187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19699379801750183, | |
| "step": 1660, | |
| "valid_targets_mean": 4017.1, | |
| "valid_targets_min": 2457 | |
| }, | |
| { | |
| "epoch": 4.141791044776119, | |
| "grad_norm": 0.681732512062585, | |
| "learning_rate": 1.7131375480993014e-05, | |
| "loss": 0.1842, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18208718299865723, | |
| "step": 1665, | |
| "valid_targets_mean": 4043.7, | |
| "valid_targets_min": 960 | |
| }, | |
| { | |
| "epoch": 4.154228855721393, | |
| "grad_norm": 0.4723457137036097, | |
| "learning_rate": 1.7008638836863455e-05, | |
| "loss": 0.1843, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17454874515533447, | |
| "step": 1670, | |
| "valid_targets_mean": 4918.2, | |
| "valid_targets_min": 2042 | |
| }, | |
| { | |
| "epoch": 4.166666666666667, | |
| "grad_norm": 0.5581086668881344, | |
| "learning_rate": 1.6886017320428817e-05, | |
| "loss": 0.1838, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18187645077705383, | |
| "step": 1675, | |
| "valid_targets_mean": 3673.4, | |
| "valid_targets_min": 2213 | |
| }, | |
| { | |
| "epoch": 4.17910447761194, | |
| "grad_norm": 0.6292356323245095, | |
| "learning_rate": 1.676351565098973e-05, | |
| "loss": 0.1842, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19597412645816803, | |
| "step": 1680, | |
| "valid_targets_mean": 3223.8, | |
| "valid_targets_min": 1178 | |
| }, | |
| { | |
| "epoch": 4.191542288557214, | |
| "grad_norm": 0.5771443261900594, | |
| "learning_rate": 1.6641138543234253e-05, | |
| "loss": 0.2058, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23651202023029327, | |
| "step": 1685, | |
| "valid_targets_mean": 3848.0, | |
| "valid_targets_min": 443 | |
| }, | |
| { | |
| "epoch": 4.203980099502488, | |
| "grad_norm": 0.5522616677962544, | |
| "learning_rate": 1.6518890707056522e-05, | |
| "loss": 0.1802, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1811617612838745, | |
| "step": 1690, | |
| "valid_targets_mean": 4084.1, | |
| "valid_targets_min": 1696 | |
| }, | |
| { | |
| "epoch": 4.2164179104477615, | |
| "grad_norm": 0.5796056383907187, | |
| "learning_rate": 1.639677684737539e-05, | |
| "loss": 0.1924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18784770369529724, | |
| "step": 1695, | |
| "valid_targets_mean": 3328.5, | |
| "valid_targets_min": 1507 | |
| }, | |
| { | |
| "epoch": 4.2288557213930345, | |
| "grad_norm": 0.622905968717353, | |
| "learning_rate": 1.6274801663953415e-05, | |
| "loss": 0.1836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1970984935760498, | |
| "step": 1700, | |
| "valid_targets_mean": 3623.7, | |
| "valid_targets_min": 1520 | |
| }, | |
| { | |
| "epoch": 4.241293532338308, | |
| "grad_norm": 0.5349694808917531, | |
| "learning_rate": 1.6152969851215966e-05, | |
| "loss": 0.1917, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17795711755752563, | |
| "step": 1705, | |
| "valid_targets_mean": 4193.2, | |
| "valid_targets_min": 1270 | |
| }, | |
| { | |
| "epoch": 4.253731343283582, | |
| "grad_norm": 0.6065208601441333, | |
| "learning_rate": 1.6031286098070523e-05, | |
| "loss": 0.1901, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19792214035987854, | |
| "step": 1710, | |
| "valid_targets_mean": 3305.4, | |
| "valid_targets_min": 1130 | |
| }, | |
| { | |
| "epoch": 4.266169154228856, | |
| "grad_norm": 0.6573570535633596, | |
| "learning_rate": 1.5909755087726265e-05, | |
| "loss": 0.1921, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19432350993156433, | |
| "step": 1715, | |
| "valid_targets_mean": 4008.4, | |
| "valid_targets_min": 200 | |
| }, | |
| { | |
| "epoch": 4.278606965174129, | |
| "grad_norm": 0.561968603462814, | |
| "learning_rate": 1.5788381497513784e-05, | |
| "loss": 0.1971, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1960953325033188, | |
| "step": 1720, | |
| "valid_targets_mean": 4200.1, | |
| "valid_targets_min": 1752 | |
| }, | |
| { | |
| "epoch": 4.291044776119403, | |
| "grad_norm": 0.5705642184565684, | |
| "learning_rate": 1.5667169998705065e-05, | |
| "loss": 0.1883, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19628682732582092, | |
| "step": 1725, | |
| "valid_targets_mean": 4268.7, | |
| "valid_targets_min": 2160 | |
| }, | |
| { | |
| "epoch": 4.303482587064677, | |
| "grad_norm": 0.5543526015290202, | |
| "learning_rate": 1.5546125256333778e-05, | |
| "loss": 0.1881, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20755350589752197, | |
| "step": 1730, | |
| "valid_targets_mean": 4264.7, | |
| "valid_targets_min": 1388 | |
| }, | |
| { | |
| "epoch": 4.3159203980099505, | |
| "grad_norm": 0.5131024835801369, | |
| "learning_rate": 1.5425251929015635e-05, | |
| "loss": 0.1807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1865994930267334, | |
| "step": 1735, | |
| "valid_targets_mean": 4723.0, | |
| "valid_targets_min": 2825 | |
| }, | |
| { | |
| "epoch": 4.3283582089552235, | |
| "grad_norm": 0.5369465865720151, | |
| "learning_rate": 1.5304554668769175e-05, | |
| "loss": 0.1817, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16630160808563232, | |
| "step": 1740, | |
| "valid_targets_mean": 4633.6, | |
| "valid_targets_min": 1557 | |
| }, | |
| { | |
| "epoch": 4.340796019900497, | |
| "grad_norm": 0.5488638067704708, | |
| "learning_rate": 1.5184038120836678e-05, | |
| "loss": 0.1855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19109171628952026, | |
| "step": 1745, | |
| "valid_targets_mean": 4214.2, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 4.353233830845771, | |
| "grad_norm": 0.6194076089336067, | |
| "learning_rate": 1.5063706923505392e-05, | |
| "loss": 0.188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2046584188938141, | |
| "step": 1750, | |
| "valid_targets_mean": 3853.9, | |
| "valid_targets_min": 2765 | |
| }, | |
| { | |
| "epoch": 4.365671641791045, | |
| "grad_norm": 0.5294295584701465, | |
| "learning_rate": 1.494356570792905e-05, | |
| "loss": 0.1885, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18276120722293854, | |
| "step": 1755, | |
| "valid_targets_mean": 4236.4, | |
| "valid_targets_min": 1666 | |
| }, | |
| { | |
| "epoch": 4.378109452736318, | |
| "grad_norm": 0.585302204807222, | |
| "learning_rate": 1.4823619097949584e-05, | |
| "loss": 0.1914, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19066128134727478, | |
| "step": 1760, | |
| "valid_targets_mean": 3614.5, | |
| "valid_targets_min": 2120 | |
| }, | |
| { | |
| "epoch": 4.390547263681592, | |
| "grad_norm": 0.5450990664960823, | |
| "learning_rate": 1.4703871709919217e-05, | |
| "loss": 0.1886, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1853064000606537, | |
| "step": 1765, | |
| "valid_targets_mean": 4034.2, | |
| "valid_targets_min": 1155 | |
| }, | |
| { | |
| "epoch": 4.402985074626866, | |
| "grad_norm": 0.5070494966282225, | |
| "learning_rate": 1.4584328152522762e-05, | |
| "loss": 0.183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19450125098228455, | |
| "step": 1770, | |
| "valid_targets_mean": 4716.2, | |
| "valid_targets_min": 970 | |
| }, | |
| { | |
| "epoch": 4.41542288557214, | |
| "grad_norm": 0.5501844594868129, | |
| "learning_rate": 1.446499302660024e-05, | |
| "loss": 0.1865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17513319849967957, | |
| "step": 1775, | |
| "valid_targets_mean": 3711.8, | |
| "valid_targets_min": 2079 | |
| }, | |
| { | |
| "epoch": 4.4278606965174125, | |
| "grad_norm": 0.591362418900047, | |
| "learning_rate": 1.4345870924969862e-05, | |
| "loss": 0.1972, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20799198746681213, | |
| "step": 1780, | |
| "valid_targets_mean": 4011.0, | |
| "valid_targets_min": 2539 | |
| }, | |
| { | |
| "epoch": 4.440298507462686, | |
| "grad_norm": 0.5193747556433063, | |
| "learning_rate": 1.4226966432251201e-05, | |
| "loss": 0.1895, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19595052301883698, | |
| "step": 1785, | |
| "valid_targets_mean": 4659.1, | |
| "valid_targets_min": 2807 | |
| }, | |
| { | |
| "epoch": 4.45273631840796, | |
| "grad_norm": 0.5569115821469196, | |
| "learning_rate": 1.4108284124688796e-05, | |
| "loss": 0.1883, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19918416440486908, | |
| "step": 1790, | |
| "valid_targets_mean": 4438.2, | |
| "valid_targets_min": 1874 | |
| }, | |
| { | |
| "epoch": 4.465174129353234, | |
| "grad_norm": 0.5283360905638507, | |
| "learning_rate": 1.3989828569976003e-05, | |
| "loss": 0.1918, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1961868703365326, | |
| "step": 1795, | |
| "valid_targets_mean": 4511.8, | |
| "valid_targets_min": 1407 | |
| }, | |
| { | |
| "epoch": 4.477611940298507, | |
| "grad_norm": 0.5532204571218482, | |
| "learning_rate": 1.3871604327079184e-05, | |
| "loss": 0.1883, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1693786382675171, | |
| "step": 1800, | |
| "valid_targets_mean": 3788.8, | |
| "valid_targets_min": 1567 | |
| }, | |
| { | |
| "epoch": 4.490049751243781, | |
| "grad_norm": 0.5359674168093468, | |
| "learning_rate": 1.37536159460623e-05, | |
| "loss": 0.1929, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.183126300573349, | |
| "step": 1805, | |
| "valid_targets_mean": 4236.6, | |
| "valid_targets_min": 1831 | |
| }, | |
| { | |
| "epoch": 4.502487562189055, | |
| "grad_norm": 0.5263751071985473, | |
| "learning_rate": 1.3635867967911734e-05, | |
| "loss": 0.1854, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18391427397727966, | |
| "step": 1810, | |
| "valid_targets_mean": 4034.3, | |
| "valid_targets_min": 2682 | |
| }, | |
| { | |
| "epoch": 4.514925373134329, | |
| "grad_norm": 0.6145126374437491, | |
| "learning_rate": 1.3518364924361564e-05, | |
| "loss": 0.1917, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18755874037742615, | |
| "step": 1815, | |
| "valid_targets_mean": 3663.2, | |
| "valid_targets_min": 941 | |
| }, | |
| { | |
| "epoch": 4.5273631840796025, | |
| "grad_norm": 0.5461908468497769, | |
| "learning_rate": 1.340111133771913e-05, | |
| "loss": 0.1942, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19034013152122498, | |
| "step": 1820, | |
| "valid_targets_mean": 4258.0, | |
| "valid_targets_min": 2231 | |
| }, | |
| { | |
| "epoch": 4.539800995024875, | |
| "grad_norm": 0.5437588969087739, | |
| "learning_rate": 1.3284111720690987e-05, | |
| "loss": 0.1919, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18442508578300476, | |
| "step": 1825, | |
| "valid_targets_mean": 3990.9, | |
| "valid_targets_min": 2466 | |
| }, | |
| { | |
| "epoch": 4.552238805970149, | |
| "grad_norm": 0.5315792588743522, | |
| "learning_rate": 1.3167370576209253e-05, | |
| "loss": 0.1874, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18224917352199554, | |
| "step": 1830, | |
| "valid_targets_mean": 4163.6, | |
| "valid_targets_min": 2181 | |
| }, | |
| { | |
| "epoch": 4.564676616915423, | |
| "grad_norm": 0.5757247651612372, | |
| "learning_rate": 1.305089239725826e-05, | |
| "loss": 0.1902, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20078767836093903, | |
| "step": 1835, | |
| "valid_targets_mean": 4150.7, | |
| "valid_targets_min": 1840 | |
| }, | |
| { | |
| "epoch": 4.577114427860696, | |
| "grad_norm": 0.5938129394057137, | |
| "learning_rate": 1.2934681666701674e-05, | |
| "loss": 0.1894, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18216751515865326, | |
| "step": 1840, | |
| "valid_targets_mean": 4025.4, | |
| "valid_targets_min": 2013 | |
| }, | |
| { | |
| "epoch": 4.58955223880597, | |
| "grad_norm": 0.6028091843290723, | |
| "learning_rate": 1.2818742857109947e-05, | |
| "loss": 0.1861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19109377264976501, | |
| "step": 1845, | |
| "valid_targets_mean": 3510.1, | |
| "valid_targets_min": 228 | |
| }, | |
| { | |
| "epoch": 4.601990049751244, | |
| "grad_norm": 0.5776897498789417, | |
| "learning_rate": 1.270308043058816e-05, | |
| "loss": 0.1905, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18680351972579956, | |
| "step": 1850, | |
| "valid_targets_mean": 3375.8, | |
| "valid_targets_min": 1071 | |
| }, | |
| { | |
| "epoch": 4.614427860696518, | |
| "grad_norm": 0.5240221774849542, | |
| "learning_rate": 1.2587698838604357e-05, | |
| "loss": 0.1925, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1797061264514923, | |
| "step": 1855, | |
| "valid_targets_mean": 4271.2, | |
| "valid_targets_min": 1977 | |
| }, | |
| { | |
| "epoch": 4.6268656716417915, | |
| "grad_norm": 0.5639717377124501, | |
| "learning_rate": 1.2472602521818136e-05, | |
| "loss": 0.1948, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21001404523849487, | |
| "step": 1860, | |
| "valid_targets_mean": 3991.2, | |
| "valid_targets_min": 1970 | |
| }, | |
| { | |
| "epoch": 4.6393034825870645, | |
| "grad_norm": 0.5205449157527914, | |
| "learning_rate": 1.2357795909909831e-05, | |
| "loss": 0.1924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21091000735759735, | |
| "step": 1865, | |
| "valid_targets_mean": 4543.5, | |
| "valid_targets_min": 1236 | |
| }, | |
| { | |
| "epoch": 4.651741293532338, | |
| "grad_norm": 0.5642643394569002, | |
| "learning_rate": 1.2243283421409944e-05, | |
| "loss": 0.1966, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19856856763362885, | |
| "step": 1870, | |
| "valid_targets_mean": 4086.1, | |
| "valid_targets_min": 2486 | |
| }, | |
| { | |
| "epoch": 4.664179104477612, | |
| "grad_norm": 0.5328298872378295, | |
| "learning_rate": 1.2129069463529147e-05, | |
| "loss": 0.1861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1959315538406372, | |
| "step": 1875, | |
| "valid_targets_mean": 4242.9, | |
| "valid_targets_min": 2666 | |
| }, | |
| { | |
| "epoch": 4.676616915422885, | |
| "grad_norm": 0.553203140113736, | |
| "learning_rate": 1.2015158431988656e-05, | |
| "loss": 0.1947, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1867968738079071, | |
| "step": 1880, | |
| "valid_targets_mean": 4032.2, | |
| "valid_targets_min": 1997 | |
| }, | |
| { | |
| "epoch": 4.689054726368159, | |
| "grad_norm": 0.579092184466775, | |
| "learning_rate": 1.1901554710851022e-05, | |
| "loss": 0.2058, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20128527283668518, | |
| "step": 1885, | |
| "valid_targets_mean": 3413.8, | |
| "valid_targets_min": 973 | |
| }, | |
| { | |
| "epoch": 4.701492537313433, | |
| "grad_norm": 0.6107118718654273, | |
| "learning_rate": 1.1788262672351451e-05, | |
| "loss": 0.1835, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19431200623512268, | |
| "step": 1890, | |
| "valid_targets_mean": 3529.1, | |
| "valid_targets_min": 1539 | |
| }, | |
| { | |
| "epoch": 4.713930348258707, | |
| "grad_norm": 0.5389866374015265, | |
| "learning_rate": 1.1675286676729489e-05, | |
| "loss": 0.1941, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19274672865867615, | |
| "step": 1895, | |
| "valid_targets_mean": 4071.8, | |
| "valid_targets_min": 2330 | |
| }, | |
| { | |
| "epoch": 4.726368159203981, | |
| "grad_norm": 0.5466037683250415, | |
| "learning_rate": 1.1562631072061214e-05, | |
| "loss": 0.189, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1851121038198471, | |
| "step": 1900, | |
| "valid_targets_mean": 4066.6, | |
| "valid_targets_min": 2216 | |
| }, | |
| { | |
| "epoch": 4.7388059701492535, | |
| "grad_norm": 0.5172604730124416, | |
| "learning_rate": 1.1450300194091936e-05, | |
| "loss": 0.1922, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18047800660133362, | |
| "step": 1905, | |
| "valid_targets_mean": 4594.6, | |
| "valid_targets_min": 852 | |
| }, | |
| { | |
| "epoch": 4.751243781094527, | |
| "grad_norm": 0.5097432319490798, | |
| "learning_rate": 1.1338298366069282e-05, | |
| "loss": 0.1899, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17912769317626953, | |
| "step": 1910, | |
| "valid_targets_mean": 4337.2, | |
| "valid_targets_min": 921 | |
| }, | |
| { | |
| "epoch": 4.763681592039801, | |
| "grad_norm": 0.527120343823522, | |
| "learning_rate": 1.1226629898576818e-05, | |
| "loss": 0.1912, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22057533264160156, | |
| "step": 1915, | |
| "valid_targets_mean": 4766.2, | |
| "valid_targets_min": 2509 | |
| }, | |
| { | |
| "epoch": 4.776119402985074, | |
| "grad_norm": 0.5341316468563161, | |
| "learning_rate": 1.1115299089368163e-05, | |
| "loss": 0.195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18534281849861145, | |
| "step": 1920, | |
| "valid_targets_mean": 3921.1, | |
| "valid_targets_min": 2193 | |
| }, | |
| { | |
| "epoch": 4.788557213930348, | |
| "grad_norm": 0.5460035866192121, | |
| "learning_rate": 1.1004310223201567e-05, | |
| "loss": 0.1834, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20613934099674225, | |
| "step": 1925, | |
| "valid_targets_mean": 4485.4, | |
| "valid_targets_min": 2111 | |
| }, | |
| { | |
| "epoch": 4.800995024875622, | |
| "grad_norm": 0.5517532913974614, | |
| "learning_rate": 1.089366757167504e-05, | |
| "loss": 0.1885, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1839025318622589, | |
| "step": 1930, | |
| "valid_targets_mean": 3908.1, | |
| "valid_targets_min": 2332 | |
| }, | |
| { | |
| "epoch": 4.813432835820896, | |
| "grad_norm": 0.529510753958286, | |
| "learning_rate": 1.0783375393061867e-05, | |
| "loss": 0.1863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19102586805820465, | |
| "step": 1935, | |
| "valid_targets_mean": 4116.6, | |
| "valid_targets_min": 2062 | |
| }, | |
| { | |
| "epoch": 4.82587064676617, | |
| "grad_norm": 0.5194175454337181, | |
| "learning_rate": 1.0673437932146844e-05, | |
| "loss": 0.1946, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1900445818901062, | |
| "step": 1940, | |
| "valid_targets_mean": 5161.1, | |
| "valid_targets_min": 2400 | |
| }, | |
| { | |
| "epoch": 4.838308457711443, | |
| "grad_norm": 0.5563197452474851, | |
| "learning_rate": 1.05638594200628e-05, | |
| "loss": 0.1906, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1914559304714203, | |
| "step": 1945, | |
| "valid_targets_mean": 3629.1, | |
| "valid_targets_min": 1381 | |
| }, | |
| { | |
| "epoch": 4.850746268656716, | |
| "grad_norm": 0.5125531536441792, | |
| "learning_rate": 1.0454644074127818e-05, | |
| "loss": 0.1822, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17317280173301697, | |
| "step": 1950, | |
| "valid_targets_mean": 4402.2, | |
| "valid_targets_min": 1409 | |
| }, | |
| { | |
| "epoch": 4.86318407960199, | |
| "grad_norm": 0.5514274931255213, | |
| "learning_rate": 1.0345796097682896e-05, | |
| "loss": 0.1912, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1964603066444397, | |
| "step": 1955, | |
| "valid_targets_mean": 4132.4, | |
| "valid_targets_min": 2369 | |
| }, | |
| { | |
| "epoch": 4.875621890547263, | |
| "grad_norm": 0.542246915785968, | |
| "learning_rate": 1.023731967993018e-05, | |
| "loss": 0.1949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19377470016479492, | |
| "step": 1960, | |
| "valid_targets_mean": 4014.6, | |
| "valid_targets_min": 2105 | |
| }, | |
| { | |
| "epoch": 4.888059701492537, | |
| "grad_norm": 0.5081914366033573, | |
| "learning_rate": 1.0129218995771766e-05, | |
| "loss": 0.1865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17151615023612976, | |
| "step": 1965, | |
| "valid_targets_mean": 3851.6, | |
| "valid_targets_min": 196 | |
| }, | |
| { | |
| "epoch": 4.900497512437811, | |
| "grad_norm": 0.5987169529933362, | |
| "learning_rate": 1.002149820564897e-05, | |
| "loss": 0.1888, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2023550570011139, | |
| "step": 1970, | |
| "valid_targets_mean": 3578.8, | |
| "valid_targets_min": 1055 | |
| }, | |
| { | |
| "epoch": 4.912935323383085, | |
| "grad_norm": 0.5582434934098669, | |
| "learning_rate": 9.914161455382215e-06, | |
| "loss": 0.1929, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1893100142478943, | |
| "step": 1975, | |
| "valid_targets_mean": 3755.3, | |
| "valid_targets_min": 1619 | |
| }, | |
| { | |
| "epoch": 4.925373134328359, | |
| "grad_norm": 0.524299212994188, | |
| "learning_rate": 9.807212876011528e-06, | |
| "loss": 0.1922, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19495365023612976, | |
| "step": 1980, | |
| "valid_targets_mean": 4259.8, | |
| "valid_targets_min": 317 | |
| }, | |
| { | |
| "epoch": 4.937810945273632, | |
| "grad_norm": 0.5458717305766948, | |
| "learning_rate": 9.700656583637484e-06, | |
| "loss": 0.1947, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18475620448589325, | |
| "step": 1985, | |
| "valid_targets_mean": 3955.7, | |
| "valid_targets_min": 1312 | |
| }, | |
| { | |
| "epoch": 4.9502487562189055, | |
| "grad_norm": 0.5988339564997255, | |
| "learning_rate": 9.594496679262822e-06, | |
| "loss": 0.1909, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20608876645565033, | |
| "step": 1990, | |
| "valid_targets_mean": 3641.8, | |
| "valid_targets_min": 1811 | |
| }, | |
| { | |
| "epoch": 4.962686567164179, | |
| "grad_norm": 0.5140756054780553, | |
| "learning_rate": 9.488737248634603e-06, | |
| "loss": 0.1953, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21765117347240448, | |
| "step": 1995, | |
| "valid_targets_mean": 5012.4, | |
| "valid_targets_min": 2130 | |
| }, | |
| { | |
| "epoch": 4.975124378109452, | |
| "grad_norm": 0.5750911579001797, | |
| "learning_rate": 9.383382362086959e-06, | |
| "loss": 0.1951, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19809523224830627, | |
| "step": 2000, | |
| "valid_targets_mean": 3847.8, | |
| "valid_targets_min": 1889 | |
| }, | |
| { | |
| "epoch": 4.987562189054726, | |
| "grad_norm": 0.5469643211901647, | |
| "learning_rate": 9.27843607438447e-06, | |
| "loss": 0.1951, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19203123450279236, | |
| "step": 2005, | |
| "valid_targets_mean": 3831.5, | |
| "valid_targets_min": 1988 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.5145051979175338, | |
| "learning_rate": 9.173902424566057e-06, | |
| "loss": 0.1912, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19573429226875305, | |
| "step": 2010, | |
| "valid_targets_mean": 4327.5, | |
| "valid_targets_min": 2066 | |
| }, | |
| { | |
| "epoch": 5.012437810945274, | |
| "grad_norm": 0.48975408551998934, | |
| "learning_rate": 9.06978543578957e-06, | |
| "loss": 0.1798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17476843297481537, | |
| "step": 2015, | |
| "valid_targets_mean": 4369.6, | |
| "valid_targets_min": 1170 | |
| }, | |
| { | |
| "epoch": 5.024875621890548, | |
| "grad_norm": 1.1782763959202165, | |
| "learning_rate": 8.966089115176945e-06, | |
| "loss": 0.1797, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17875716090202332, | |
| "step": 2020, | |
| "valid_targets_mean": 4059.2, | |
| "valid_targets_min": 1149 | |
| }, | |
| { | |
| "epoch": 5.037313432835821, | |
| "grad_norm": 0.5373043390636772, | |
| "learning_rate": 8.862817453659968e-06, | |
| "loss": 0.1839, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1860535442829132, | |
| "step": 2025, | |
| "valid_targets_mean": 4277.5, | |
| "valid_targets_min": 2642 | |
| }, | |
| { | |
| "epoch": 5.0497512437810945, | |
| "grad_norm": 0.5331643182583649, | |
| "learning_rate": 8.759974425826696e-06, | |
| "loss": 0.1802, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16362561285495758, | |
| "step": 2030, | |
| "valid_targets_mean": 3765.5, | |
| "valid_targets_min": 2097 | |
| }, | |
| { | |
| "epoch": 5.062189054726368, | |
| "grad_norm": 0.544019833677962, | |
| "learning_rate": 8.657563989768467e-06, | |
| "loss": 0.1885, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2051907181739807, | |
| "step": 2035, | |
| "valid_targets_mean": 4519.1, | |
| "valid_targets_min": 2674 | |
| }, | |
| { | |
| "epoch": 5.074626865671641, | |
| "grad_norm": 0.62327450747123, | |
| "learning_rate": 8.555590086927602e-06, | |
| "loss": 0.1885, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19175231456756592, | |
| "step": 2040, | |
| "valid_targets_mean": 3685.4, | |
| "valid_targets_min": 853 | |
| }, | |
| { | |
| "epoch": 5.087064676616915, | |
| "grad_norm": 0.5308764481118281, | |
| "learning_rate": 8.454056641945665e-06, | |
| "loss": 0.1859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1750047206878662, | |
| "step": 2045, | |
| "valid_targets_mean": 4141.5, | |
| "valid_targets_min": 2422 | |
| }, | |
| { | |
| "epoch": 5.099502487562189, | |
| "grad_norm": 0.5185262701369572, | |
| "learning_rate": 8.352967562512448e-06, | |
| "loss": 0.1811, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1702880859375, | |
| "step": 2050, | |
| "valid_targets_mean": 4598.6, | |
| "valid_targets_min": 1330 | |
| }, | |
| { | |
| "epoch": 5.111940298507463, | |
| "grad_norm": 0.5829644925735894, | |
| "learning_rate": 8.252326739215568e-06, | |
| "loss": 0.1698, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1757672280073166, | |
| "step": 2055, | |
| "valid_targets_mean": 3722.4, | |
| "valid_targets_min": 1948 | |
| }, | |
| { | |
| "epoch": 5.124378109452737, | |
| "grad_norm": 0.5984769834446014, | |
| "learning_rate": 8.152138045390725e-06, | |
| "loss": 0.1792, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19725893437862396, | |
| "step": 2060, | |
| "valid_targets_mean": 3535.5, | |
| "valid_targets_min": 1839 | |
| }, | |
| { | |
| "epoch": 5.13681592039801, | |
| "grad_norm": 0.4884187116919498, | |
| "learning_rate": 8.052405336972659e-06, | |
| "loss": 0.1708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16476622223854065, | |
| "step": 2065, | |
| "valid_targets_mean": 4477.9, | |
| "valid_targets_min": 1939 | |
| }, | |
| { | |
| "epoch": 5.149253731343284, | |
| "grad_norm": 0.8229280153452522, | |
| "learning_rate": 7.953132452346693e-06, | |
| "loss": 0.1735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16132110357284546, | |
| "step": 2070, | |
| "valid_targets_mean": 3924.3, | |
| "valid_targets_min": 1278 | |
| }, | |
| { | |
| "epoch": 5.161691542288557, | |
| "grad_norm": 0.4724798431111937, | |
| "learning_rate": 7.854323212201047e-06, | |
| "loss": 0.1829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17464956641197205, | |
| "step": 2075, | |
| "valid_targets_mean": 5130.9, | |
| "valid_targets_min": 3182 | |
| }, | |
| { | |
| "epoch": 5.174129353233831, | |
| "grad_norm": 0.6676453133494172, | |
| "learning_rate": 7.75598141937981e-06, | |
| "loss": 0.1842, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17902524769306183, | |
| "step": 2080, | |
| "valid_targets_mean": 3899.2, | |
| "valid_targets_min": 1769 | |
| }, | |
| { | |
| "epoch": 5.186567164179104, | |
| "grad_norm": 0.581722657791125, | |
| "learning_rate": 7.658110858736523e-06, | |
| "loss": 0.1839, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1738991141319275, | |
| "step": 2085, | |
| "valid_targets_mean": 4717.8, | |
| "valid_targets_min": 1597 | |
| }, | |
| { | |
| "epoch": 5.199004975124378, | |
| "grad_norm": 0.6119229068009544, | |
| "learning_rate": 7.560715296988554e-06, | |
| "loss": 0.167, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16843843460083008, | |
| "step": 2090, | |
| "valid_targets_mean": 3586.2, | |
| "valid_targets_min": 1305 | |
| }, | |
| { | |
| "epoch": 5.211442786069652, | |
| "grad_norm": 0.6296917134765727, | |
| "learning_rate": 7.463798482572122e-06, | |
| "loss": 0.1735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17999431490898132, | |
| "step": 2095, | |
| "valid_targets_mean": 3528.2, | |
| "valid_targets_min": 1513 | |
| }, | |
| { | |
| "epoch": 5.223880597014926, | |
| "grad_norm": 0.545873150726275, | |
| "learning_rate": 7.36736414549802e-06, | |
| "loss": 0.1718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18782277405261993, | |
| "step": 2100, | |
| "valid_targets_mean": 4359.8, | |
| "valid_targets_min": 2033 | |
| }, | |
| { | |
| "epoch": 5.236318407960199, | |
| "grad_norm": 0.5785180015454172, | |
| "learning_rate": 7.271415997208093e-06, | |
| "loss": 0.1849, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1869148463010788, | |
| "step": 2105, | |
| "valid_targets_mean": 3707.5, | |
| "valid_targets_min": 2328 | |
| }, | |
| { | |
| "epoch": 5.248756218905473, | |
| "grad_norm": 0.5764954136094295, | |
| "learning_rate": 7.17595773043233e-06, | |
| "loss": 0.1695, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18479612469673157, | |
| "step": 2110, | |
| "valid_targets_mean": 4026.6, | |
| "valid_targets_min": 2210 | |
| }, | |
| { | |
| "epoch": 5.2611940298507465, | |
| "grad_norm": 0.5204559401076309, | |
| "learning_rate": 7.080993019046827e-06, | |
| "loss": 0.1822, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17545653879642487, | |
| "step": 2115, | |
| "valid_targets_mean": 4619.4, | |
| "valid_targets_min": 2520 | |
| }, | |
| { | |
| "epoch": 5.273631840796019, | |
| "grad_norm": 0.5629684650741945, | |
| "learning_rate": 6.986525517932321e-06, | |
| "loss": 0.1773, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16429266333580017, | |
| "step": 2120, | |
| "valid_targets_mean": 3826.1, | |
| "valid_targets_min": 1140 | |
| }, | |
| { | |
| "epoch": 5.286069651741293, | |
| "grad_norm": 0.5207487254296341, | |
| "learning_rate": 6.892558862833569e-06, | |
| "loss": 0.19, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17473994195461273, | |
| "step": 2125, | |
| "valid_targets_mean": 4406.2, | |
| "valid_targets_min": 2004 | |
| }, | |
| { | |
| "epoch": 5.298507462686567, | |
| "grad_norm": 0.6137205942170112, | |
| "learning_rate": 6.799096670219396e-06, | |
| "loss": 0.1832, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18868446350097656, | |
| "step": 2130, | |
| "valid_targets_mean": 4076.2, | |
| "valid_targets_min": 1472 | |
| }, | |
| { | |
| "epoch": 5.310945273631841, | |
| "grad_norm": 0.4900273933795682, | |
| "learning_rate": 6.706142537143518e-06, | |
| "loss": 0.1778, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17624013125896454, | |
| "step": 2135, | |
| "valid_targets_mean": 5214.7, | |
| "valid_targets_min": 1883 | |
| }, | |
| { | |
| "epoch": 5.323383084577115, | |
| "grad_norm": 0.5288471175274457, | |
| "learning_rate": 6.613700041106119e-06, | |
| "loss": 0.1701, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18341976404190063, | |
| "step": 2140, | |
| "valid_targets_mean": 4289.6, | |
| "valid_targets_min": 1156 | |
| }, | |
| { | |
| "epoch": 5.335820895522388, | |
| "grad_norm": 0.5519799885784733, | |
| "learning_rate": 6.521772739916137e-06, | |
| "loss": 0.1824, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16220718622207642, | |
| "step": 2145, | |
| "valid_targets_mean": 4097.6, | |
| "valid_targets_min": 1881 | |
| }, | |
| { | |
| "epoch": 5.348258706467662, | |
| "grad_norm": 0.641394208147078, | |
| "learning_rate": 6.430364171554329e-06, | |
| "loss": 0.1789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1815165877342224, | |
| "step": 2150, | |
| "valid_targets_mean": 3265.1, | |
| "valid_targets_min": 222 | |
| }, | |
| { | |
| "epoch": 5.3606965174129355, | |
| "grad_norm": 0.5017060911996063, | |
| "learning_rate": 6.33947785403716e-06, | |
| "loss": 0.1768, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15326166152954102, | |
| "step": 2155, | |
| "valid_targets_mean": 4282.4, | |
| "valid_targets_min": 1306 | |
| }, | |
| { | |
| "epoch": 5.373134328358209, | |
| "grad_norm": 0.5629109675071312, | |
| "learning_rate": 6.249117285281348e-06, | |
| "loss": 0.1981, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19157464802265167, | |
| "step": 2160, | |
| "valid_targets_mean": 4362.0, | |
| "valid_targets_min": 1910 | |
| }, | |
| { | |
| "epoch": 5.385572139303482, | |
| "grad_norm": 0.5573924452067587, | |
| "learning_rate": 6.159285942969266e-06, | |
| "loss": 0.1741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18228746950626373, | |
| "step": 2165, | |
| "valid_targets_mean": 3975.6, | |
| "valid_targets_min": 2189 | |
| }, | |
| { | |
| "epoch": 5.398009950248756, | |
| "grad_norm": 0.5588862231529654, | |
| "learning_rate": 6.0699872844151e-06, | |
| "loss": 0.1909, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20605233311653137, | |
| "step": 2170, | |
| "valid_targets_mean": 4185.6, | |
| "valid_targets_min": 1682 | |
| }, | |
| { | |
| "epoch": 5.41044776119403, | |
| "grad_norm": 0.5819160509013732, | |
| "learning_rate": 5.9812247464317685e-06, | |
| "loss": 0.1873, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1820629984140396, | |
| "step": 2175, | |
| "valid_targets_mean": 3783.1, | |
| "valid_targets_min": 941 | |
| }, | |
| { | |
| "epoch": 5.422885572139304, | |
| "grad_norm": 0.5625161894644168, | |
| "learning_rate": 5.893001745198692e-06, | |
| "loss": 0.1789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17581865191459656, | |
| "step": 2180, | |
| "valid_targets_mean": 3561.9, | |
| "valid_targets_min": 2275 | |
| }, | |
| { | |
| "epoch": 5.435323383084577, | |
| "grad_norm": 0.5946540446785107, | |
| "learning_rate": 5.805321676130262e-06, | |
| "loss": 0.1841, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1829841583967209, | |
| "step": 2185, | |
| "valid_targets_mean": 3860.6, | |
| "valid_targets_min": 2281 | |
| }, | |
| { | |
| "epoch": 5.447761194029851, | |
| "grad_norm": 0.5531530928449198, | |
| "learning_rate": 5.718187913745199e-06, | |
| "loss": 0.1755, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1640455424785614, | |
| "step": 2190, | |
| "valid_targets_mean": 4010.9, | |
| "valid_targets_min": 1509 | |
| }, | |
| { | |
| "epoch": 5.460199004975125, | |
| "grad_norm": 0.5893338522051409, | |
| "learning_rate": 5.631603811536668e-06, | |
| "loss": 0.1892, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1882874071598053, | |
| "step": 2195, | |
| "valid_targets_mean": 3535.2, | |
| "valid_targets_min": 1706 | |
| }, | |
| { | |
| "epoch": 5.472636815920398, | |
| "grad_norm": 0.6073509265616175, | |
| "learning_rate": 5.545572701843216e-06, | |
| "loss": 0.1782, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18738335371017456, | |
| "step": 2200, | |
| "valid_targets_mean": 4425.8, | |
| "valid_targets_min": 2013 | |
| }, | |
| { | |
| "epoch": 5.485074626865671, | |
| "grad_norm": 0.5310642730413633, | |
| "learning_rate": 5.4600978957205135e-06, | |
| "loss": 0.1885, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.168120339512825, | |
| "step": 2205, | |
| "valid_targets_mean": 3844.4, | |
| "valid_targets_min": 1734 | |
| }, | |
| { | |
| "epoch": 5.497512437810945, | |
| "grad_norm": 0.555820405124083, | |
| "learning_rate": 5.375182682813929e-06, | |
| "loss": 0.1795, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1976512372493744, | |
| "step": 2210, | |
| "valid_targets_mean": 3939.1, | |
| "valid_targets_min": 1724 | |
| }, | |
| { | |
| "epoch": 5.509950248756219, | |
| "grad_norm": 0.5509023684550313, | |
| "learning_rate": 5.290830331231933e-06, | |
| "loss": 0.1743, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1776900589466095, | |
| "step": 2215, | |
| "valid_targets_mean": 4289.1, | |
| "valid_targets_min": 1658 | |
| }, | |
| { | |
| "epoch": 5.522388059701493, | |
| "grad_norm": 0.5113496796090481, | |
| "learning_rate": 5.2070440874202925e-06, | |
| "loss": 0.1728, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17290140688419342, | |
| "step": 2220, | |
| "valid_targets_mean": 4486.5, | |
| "valid_targets_min": 1644 | |
| }, | |
| { | |
| "epoch": 5.534825870646766, | |
| "grad_norm": 0.5419706723077836, | |
| "learning_rate": 5.123827176037146e-06, | |
| "loss": 0.1798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18113742768764496, | |
| "step": 2225, | |
| "valid_targets_mean": 4176.4, | |
| "valid_targets_min": 1097 | |
| }, | |
| { | |
| "epoch": 5.54726368159204, | |
| "grad_norm": 0.5714071952278112, | |
| "learning_rate": 5.041182799828888e-06, | |
| "loss": 0.1764, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16627778112888336, | |
| "step": 2230, | |
| "valid_targets_mean": 3533.5, | |
| "valid_targets_min": 1673 | |
| }, | |
| { | |
| "epoch": 5.559701492537314, | |
| "grad_norm": 0.5631880598259031, | |
| "learning_rate": 4.959114139506909e-06, | |
| "loss": 0.1703, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16612115502357483, | |
| "step": 2235, | |
| "valid_targets_mean": 3968.1, | |
| "valid_targets_min": 2003 | |
| }, | |
| { | |
| "epoch": 5.572139303482587, | |
| "grad_norm": 0.5954530663996263, | |
| "learning_rate": 4.877624353625197e-06, | |
| "loss": 0.177, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18126454949378967, | |
| "step": 2240, | |
| "valid_targets_mean": 3962.8, | |
| "valid_targets_min": 2013 | |
| }, | |
| { | |
| "epoch": 5.58457711442786, | |
| "grad_norm": 0.5504249794441187, | |
| "learning_rate": 4.7967165784587284e-06, | |
| "loss": 0.1832, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19141951203346252, | |
| "step": 2245, | |
| "valid_targets_mean": 4483.5, | |
| "valid_targets_min": 1041 | |
| }, | |
| { | |
| "epoch": 5.597014925373134, | |
| "grad_norm": 0.6159216942243276, | |
| "learning_rate": 4.71639392788281e-06, | |
| "loss": 0.184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18392285704612732, | |
| "step": 2250, | |
| "valid_targets_mean": 3742.2, | |
| "valid_targets_min": 1851 | |
| }, | |
| { | |
| "epoch": 5.609452736318408, | |
| "grad_norm": 0.6025337544936586, | |
| "learning_rate": 4.6366594932532285e-06, | |
| "loss": 0.1781, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19583985209465027, | |
| "step": 2255, | |
| "valid_targets_mean": 3859.6, | |
| "valid_targets_min": 1696 | |
| }, | |
| { | |
| "epoch": 5.621890547263682, | |
| "grad_norm": 0.5398477942967179, | |
| "learning_rate": 4.557516343287251e-06, | |
| "loss": 0.173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1676681786775589, | |
| "step": 2260, | |
| "valid_targets_mean": 3899.4, | |
| "valid_targets_min": 448 | |
| }, | |
| { | |
| "epoch": 5.634328358208955, | |
| "grad_norm": 0.5503845662196336, | |
| "learning_rate": 4.4789675239455385e-06, | |
| "loss": 0.1785, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17880871891975403, | |
| "step": 2265, | |
| "valid_targets_mean": 3832.9, | |
| "valid_targets_min": 1595 | |
| }, | |
| { | |
| "epoch": 5.646766169154229, | |
| "grad_norm": 0.5103257331663372, | |
| "learning_rate": 4.401016058314913e-06, | |
| "loss": 0.1732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17229273915290833, | |
| "step": 2270, | |
| "valid_targets_mean": 4335.1, | |
| "valid_targets_min": 1417 | |
| }, | |
| { | |
| "epoch": 5.659203980099503, | |
| "grad_norm": 0.5094018756134759, | |
| "learning_rate": 4.3236649464919986e-06, | |
| "loss": 0.1789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16803838312625885, | |
| "step": 2275, | |
| "valid_targets_mean": 4230.3, | |
| "valid_targets_min": 2119 | |
| }, | |
| { | |
| "epoch": 5.6716417910447765, | |
| "grad_norm": 0.5319736807745282, | |
| "learning_rate": 4.246917165467799e-06, | |
| "loss": 0.1816, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19026246666908264, | |
| "step": 2280, | |
| "valid_targets_mean": 4721.9, | |
| "valid_targets_min": 2278 | |
| }, | |
| { | |
| "epoch": 5.6840796019900495, | |
| "grad_norm": 0.5869835916274105, | |
| "learning_rate": 4.170775669013041e-06, | |
| "loss": 0.1741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18648019433021545, | |
| "step": 2285, | |
| "valid_targets_mean": 3507.3, | |
| "valid_targets_min": 2093 | |
| }, | |
| { | |
| "epoch": 5.696517412935323, | |
| "grad_norm": 0.5334102609956934, | |
| "learning_rate": 4.095243387564593e-06, | |
| "loss": 0.1732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18063801527023315, | |
| "step": 2290, | |
| "valid_targets_mean": 4553.8, | |
| "valid_targets_min": 2042 | |
| }, | |
| { | |
| "epoch": 5.708955223880597, | |
| "grad_norm": 0.5694193724204722, | |
| "learning_rate": 4.020323228112604e-06, | |
| "loss": 0.1927, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1846069097518921, | |
| "step": 2295, | |
| "valid_targets_mean": 4064.2, | |
| "valid_targets_min": 1370 | |
| }, | |
| { | |
| "epoch": 5.721393034825871, | |
| "grad_norm": 0.5452728006991922, | |
| "learning_rate": 3.9460180740886625e-06, | |
| "loss": 0.1775, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17648135125637054, | |
| "step": 2300, | |
| "valid_targets_mean": 4065.1, | |
| "valid_targets_min": 2102 | |
| }, | |
| { | |
| "epoch": 5.733830845771144, | |
| "grad_norm": 0.5669921219625754, | |
| "learning_rate": 3.872330785254803e-06, | |
| "loss": 0.1784, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18603132665157318, | |
| "step": 2305, | |
| "valid_targets_mean": 3896.8, | |
| "valid_targets_min": 1993 | |
| }, | |
| { | |
| "epoch": 5.746268656716418, | |
| "grad_norm": 0.5623459293769465, | |
| "learning_rate": 3.7992641975934595e-06, | |
| "loss": 0.1897, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1948518455028534, | |
| "step": 2310, | |
| "valid_targets_mean": 4391.6, | |
| "valid_targets_min": 1950 | |
| }, | |
| { | |
| "epoch": 5.758706467661692, | |
| "grad_norm": 0.5330298017777138, | |
| "learning_rate": 3.7268211231983185e-06, | |
| "loss": 0.1821, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1722792387008667, | |
| "step": 2315, | |
| "valid_targets_mean": 4296.8, | |
| "valid_targets_min": 2070 | |
| }, | |
| { | |
| "epoch": 5.7711442786069655, | |
| "grad_norm": 0.6779738524621803, | |
| "learning_rate": 3.6550043501660736e-06, | |
| "loss": 0.1829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17438241839408875, | |
| "step": 2320, | |
| "valid_targets_mean": 3621.8, | |
| "valid_targets_min": 1850 | |
| }, | |
| { | |
| "epoch": 5.7835820895522385, | |
| "grad_norm": 0.6308227572737273, | |
| "learning_rate": 3.583816642489113e-06, | |
| "loss": 0.1784, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18285539746284485, | |
| "step": 2325, | |
| "valid_targets_mean": 3364.1, | |
| "valid_targets_min": 1441 | |
| }, | |
| { | |
| "epoch": 5.796019900497512, | |
| "grad_norm": 0.5466656922264711, | |
| "learning_rate": 3.513260739949196e-06, | |
| "loss": 0.1785, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.160654678940773, | |
| "step": 2330, | |
| "valid_targets_mean": 3844.6, | |
| "valid_targets_min": 2190 | |
| }, | |
| { | |
| "epoch": 5.808457711442786, | |
| "grad_norm": 0.5473761351277354, | |
| "learning_rate": 3.4433393580119436e-06, | |
| "loss": 0.1821, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17680670320987701, | |
| "step": 2335, | |
| "valid_targets_mean": 3974.6, | |
| "valid_targets_min": 1692 | |
| }, | |
| { | |
| "epoch": 5.82089552238806, | |
| "grad_norm": 0.549317498826441, | |
| "learning_rate": 3.3740551877223647e-06, | |
| "loss": 0.1813, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17757967114448547, | |
| "step": 2340, | |
| "valid_targets_mean": 3935.1, | |
| "valid_targets_min": 2554 | |
| }, | |
| { | |
| "epoch": 5.833333333333333, | |
| "grad_norm": 0.5224545749951351, | |
| "learning_rate": 3.30541089560128e-06, | |
| "loss": 0.1744, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16635212302207947, | |
| "step": 2345, | |
| "valid_targets_mean": 4472.9, | |
| "valid_targets_min": 2049 | |
| }, | |
| { | |
| "epoch": 5.845771144278607, | |
| "grad_norm": 0.5477440996260752, | |
| "learning_rate": 3.2374091235426918e-06, | |
| "loss": 0.1759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16781067848205566, | |
| "step": 2350, | |
| "valid_targets_mean": 3725.6, | |
| "valid_targets_min": 2020 | |
| }, | |
| { | |
| "epoch": 5.858208955223881, | |
| "grad_norm": 0.6293176391340207, | |
| "learning_rate": 3.1700524887121188e-06, | |
| "loss": 0.1786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.193925142288208, | |
| "step": 2355, | |
| "valid_targets_mean": 3524.7, | |
| "valid_targets_min": 1801 | |
| }, | |
| { | |
| "epoch": 5.870646766169155, | |
| "grad_norm": 0.5849523473688234, | |
| "learning_rate": 3.103343583445848e-06, | |
| "loss": 0.1794, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18686872720718384, | |
| "step": 2360, | |
| "valid_targets_mean": 3800.3, | |
| "valid_targets_min": 2279 | |
| }, | |
| { | |
| "epoch": 5.883084577114428, | |
| "grad_norm": 0.546446441498615, | |
| "learning_rate": 3.037284975151182e-06, | |
| "loss": 0.179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.174645334482193, | |
| "step": 2365, | |
| "valid_targets_mean": 4354.5, | |
| "valid_targets_min": 2242 | |
| }, | |
| { | |
| "epoch": 5.895522388059701, | |
| "grad_norm": 0.6294684870590153, | |
| "learning_rate": 2.9718792062076264e-06, | |
| "loss": 0.1773, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16901081800460815, | |
| "step": 2370, | |
| "valid_targets_mean": 3182.9, | |
| "valid_targets_min": 960 | |
| }, | |
| { | |
| "epoch": 5.907960199004975, | |
| "grad_norm": 0.545529756238248, | |
| "learning_rate": 2.9071287938690298e-06, | |
| "loss": 0.1844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19544681906700134, | |
| "step": 2375, | |
| "valid_targets_mean": 4001.1, | |
| "valid_targets_min": 915 | |
| }, | |
| { | |
| "epoch": 5.920398009950249, | |
| "grad_norm": 0.5761291485378018, | |
| "learning_rate": 2.843036230166718e-06, | |
| "loss": 0.1749, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17292159795761108, | |
| "step": 2380, | |
| "valid_targets_mean": 3945.6, | |
| "valid_targets_min": 1765 | |
| }, | |
| { | |
| "epoch": 5.932835820895522, | |
| "grad_norm": 0.5200481295816748, | |
| "learning_rate": 2.779603981813568e-06, | |
| "loss": 0.1672, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1651022732257843, | |
| "step": 2385, | |
| "valid_targets_mean": 4494.5, | |
| "valid_targets_min": 860 | |
| }, | |
| { | |
| "epoch": 5.945273631840796, | |
| "grad_norm": 0.5690349697582774, | |
| "learning_rate": 2.7168344901091016e-06, | |
| "loss": 0.1855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17301228642463684, | |
| "step": 2390, | |
| "valid_targets_mean": 4024.2, | |
| "valid_targets_min": 2194 | |
| }, | |
| { | |
| "epoch": 5.95771144278607, | |
| "grad_norm": 0.5426341756011361, | |
| "learning_rate": 2.6547301708454877e-06, | |
| "loss": 0.184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19446370005607605, | |
| "step": 2395, | |
| "valid_targets_mean": 4282.0, | |
| "valid_targets_min": 1255 | |
| }, | |
| { | |
| "epoch": 5.970149253731344, | |
| "grad_norm": 0.6008319137622877, | |
| "learning_rate": 2.5932934142145906e-06, | |
| "loss": 0.1695, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17186178267002106, | |
| "step": 2400, | |
| "valid_targets_mean": 4482.7, | |
| "valid_targets_min": 1574 | |
| }, | |
| { | |
| "epoch": 5.982587064676617, | |
| "grad_norm": 0.5514765334050616, | |
| "learning_rate": 2.5325265847159798e-06, | |
| "loss": 0.1813, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.167934849858284, | |
| "step": 2405, | |
| "valid_targets_mean": 3971.0, | |
| "valid_targets_min": 1741 | |
| }, | |
| { | |
| "epoch": 5.9950248756218905, | |
| "grad_norm": 0.5743859815483373, | |
| "learning_rate": 2.472432021065918e-06, | |
| "loss": 0.175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17497491836547852, | |
| "step": 2410, | |
| "valid_targets_mean": 4070.7, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 6.007462686567164, | |
| "grad_norm": 0.5125255425588743, | |
| "learning_rate": 2.4130120361073716e-06, | |
| "loss": 0.1705, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1637192666530609, | |
| "step": 2415, | |
| "valid_targets_mean": 4396.6, | |
| "valid_targets_min": 1850 | |
| }, | |
| { | |
| "epoch": 6.019900497512438, | |
| "grad_norm": 0.5724876553676014, | |
| "learning_rate": 2.3542689167209563e-06, | |
| "loss": 0.1782, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17785580456256866, | |
| "step": 2420, | |
| "valid_targets_mean": 3476.4, | |
| "valid_targets_min": 1979 | |
| }, | |
| { | |
| "epoch": 6.032338308457711, | |
| "grad_norm": 0.5879062795130893, | |
| "learning_rate": 2.2962049237369643e-06, | |
| "loss": 0.1727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16875743865966797, | |
| "step": 2425, | |
| "valid_targets_mean": 3416.4, | |
| "valid_targets_min": 1696 | |
| }, | |
| { | |
| "epoch": 6.044776119402985, | |
| "grad_norm": 0.5588828300455307, | |
| "learning_rate": 2.238822291848344e-06, | |
| "loss": 0.1746, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17179271578788757, | |
| "step": 2430, | |
| "valid_targets_mean": 3605.9, | |
| "valid_targets_min": 1418 | |
| }, | |
| { | |
| "epoch": 6.057213930348259, | |
| "grad_norm": 0.5295142510715316, | |
| "learning_rate": 2.182123229524673e-06, | |
| "loss": 0.1741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1968148648738861, | |
| "step": 2435, | |
| "valid_targets_mean": 4323.1, | |
| "valid_targets_min": 2235 | |
| }, | |
| { | |
| "epoch": 6.069651741293533, | |
| "grad_norm": 0.6168460567108859, | |
| "learning_rate": 2.1261099189271792e-06, | |
| "loss": 0.1715, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1779354065656662, | |
| "step": 2440, | |
| "valid_targets_mean": 3457.3, | |
| "valid_targets_min": 1629 | |
| }, | |
| { | |
| "epoch": 6.082089552238806, | |
| "grad_norm": 0.5621656463077643, | |
| "learning_rate": 2.070784515824753e-06, | |
| "loss": 0.1753, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.159359872341156, | |
| "step": 2445, | |
| "valid_targets_mean": 4689.1, | |
| "valid_targets_min": 2366 | |
| }, | |
| { | |
| "epoch": 6.0945273631840795, | |
| "grad_norm": 0.5487542240351966, | |
| "learning_rate": 2.016149149510975e-06, | |
| "loss": 0.1738, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16500002145767212, | |
| "step": 2450, | |
| "valid_targets_mean": 4167.4, | |
| "valid_targets_min": 2067 | |
| }, | |
| { | |
| "epoch": 6.106965174129353, | |
| "grad_norm": 0.5686330625834844, | |
| "learning_rate": 1.9622059227221825e-06, | |
| "loss": 0.1733, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17375610768795013, | |
| "step": 2455, | |
| "valid_targets_mean": 3917.0, | |
| "valid_targets_min": 1876 | |
| }, | |
| { | |
| "epoch": 6.119402985074627, | |
| "grad_norm": 0.5233617770605969, | |
| "learning_rate": 1.9089569115565052e-06, | |
| "loss": 0.1753, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16628089547157288, | |
| "step": 2460, | |
| "valid_targets_mean": 4445.4, | |
| "valid_targets_min": 2398 | |
| }, | |
| { | |
| "epoch": 6.1318407960199, | |
| "grad_norm": 0.5914278966941198, | |
| "learning_rate": 1.8564041653940123e-06, | |
| "loss": 0.1717, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16870436072349548, | |
| "step": 2465, | |
| "valid_targets_mean": 4021.4, | |
| "valid_targets_min": 2352 | |
| }, | |
| { | |
| "epoch": 6.144278606965174, | |
| "grad_norm": 0.5693055333467447, | |
| "learning_rate": 1.8045497068177975e-06, | |
| "loss": 0.1798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1836708039045334, | |
| "step": 2470, | |
| "valid_targets_mean": 3719.6, | |
| "valid_targets_min": 1833 | |
| }, | |
| { | |
| "epoch": 6.156716417910448, | |
| "grad_norm": 0.652372737993125, | |
| "learning_rate": 1.7533955315361551e-06, | |
| "loss": 0.1756, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1608482003211975, | |
| "step": 2475, | |
| "valid_targets_mean": 3182.6, | |
| "valid_targets_min": 2079 | |
| }, | |
| { | |
| "epoch": 6.169154228855722, | |
| "grad_norm": 0.533482071857129, | |
| "learning_rate": 1.7029436083057715e-06, | |
| "loss": 0.1785, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16928087174892426, | |
| "step": 2480, | |
| "valid_targets_mean": 4331.8, | |
| "valid_targets_min": 1303 | |
| }, | |
| { | |
| "epoch": 6.181592039800995, | |
| "grad_norm": 0.5464490007192712, | |
| "learning_rate": 1.6531958788559465e-06, | |
| "loss": 0.1713, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16682176291942596, | |
| "step": 2485, | |
| "valid_targets_mean": 3949.8, | |
| "valid_targets_min": 1289 | |
| }, | |
| { | |
| "epoch": 6.1940298507462686, | |
| "grad_norm": 0.5623683876558216, | |
| "learning_rate": 1.6041542578138746e-06, | |
| "loss": 0.1749, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18091854453086853, | |
| "step": 2490, | |
| "valid_targets_mean": 4234.9, | |
| "valid_targets_min": 1575 | |
| }, | |
| { | |
| "epoch": 6.206467661691542, | |
| "grad_norm": 0.5238269588411919, | |
| "learning_rate": 1.5558206326309511e-06, | |
| "loss": 0.1807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16900426149368286, | |
| "step": 2495, | |
| "valid_targets_mean": 4697.7, | |
| "valid_targets_min": 1876 | |
| }, | |
| { | |
| "epoch": 6.218905472636816, | |
| "grad_norm": 0.5716740018491285, | |
| "learning_rate": 1.5081968635101097e-06, | |
| "loss": 0.1739, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18251916766166687, | |
| "step": 2500, | |
| "valid_targets_mean": 3668.0, | |
| "valid_targets_min": 1509 | |
| }, | |
| { | |
| "epoch": 6.231343283582089, | |
| "grad_norm": 0.5363180031909048, | |
| "learning_rate": 1.4612847833342759e-06, | |
| "loss": 0.1768, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1818065345287323, | |
| "step": 2505, | |
| "valid_targets_mean": 4083.4, | |
| "valid_targets_min": 1806 | |
| }, | |
| { | |
| "epoch": 6.243781094527363, | |
| "grad_norm": 0.6429529839019501, | |
| "learning_rate": 1.4150861975957786e-06, | |
| "loss": 0.1699, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18589231371879578, | |
| "step": 2510, | |
| "valid_targets_mean": 3552.3, | |
| "valid_targets_min": 1876 | |
| }, | |
| { | |
| "epoch": 6.256218905472637, | |
| "grad_norm": 0.4975492608341589, | |
| "learning_rate": 1.3696028843268993e-06, | |
| "loss": 0.1677, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15583984553813934, | |
| "step": 2515, | |
| "valid_targets_mean": 4623.1, | |
| "valid_targets_min": 1947 | |
| }, | |
| { | |
| "epoch": 6.268656716417911, | |
| "grad_norm": 0.5345304367276562, | |
| "learning_rate": 1.3248365940314067e-06, | |
| "loss": 0.1729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1705108880996704, | |
| "step": 2520, | |
| "valid_targets_mean": 4393.9, | |
| "valid_targets_min": 1990 | |
| }, | |
| { | |
| "epoch": 6.281094527363184, | |
| "grad_norm": 0.5373113443994114, | |
| "learning_rate": 1.2807890496172103e-06, | |
| "loss": 0.1713, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1581442952156067, | |
| "step": 2525, | |
| "valid_targets_mean": 4299.2, | |
| "valid_targets_min": 1423 | |
| }, | |
| { | |
| "epoch": 6.293532338308458, | |
| "grad_norm": 0.514081511794592, | |
| "learning_rate": 1.237461946330054e-06, | |
| "loss": 0.1739, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15877807140350342, | |
| "step": 2530, | |
| "valid_targets_mean": 4525.9, | |
| "valid_targets_min": 2502 | |
| }, | |
| { | |
| "epoch": 6.3059701492537314, | |
| "grad_norm": 0.560992308158361, | |
| "learning_rate": 1.1948569516882503e-06, | |
| "loss": 0.1706, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16327780485153198, | |
| "step": 2535, | |
| "valid_targets_mean": 4197.6, | |
| "valid_targets_min": 2315 | |
| }, | |
| { | |
| "epoch": 6.318407960199005, | |
| "grad_norm": 0.5265783790926104, | |
| "learning_rate": 1.1529757054185176e-06, | |
| "loss": 0.174, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17675918340682983, | |
| "step": 2540, | |
| "valid_targets_mean": 4462.6, | |
| "valid_targets_min": 1880 | |
| }, | |
| { | |
| "epoch": 6.330845771144278, | |
| "grad_norm": 0.5416867779723658, | |
| "learning_rate": 1.111819819392872e-06, | |
| "loss": 0.1768, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17835187911987305, | |
| "step": 2545, | |
| "valid_targets_mean": 4455.2, | |
| "valid_targets_min": 1971 | |
| }, | |
| { | |
| "epoch": 6.343283582089552, | |
| "grad_norm": 0.5881401941231864, | |
| "learning_rate": 1.0713908775665827e-06, | |
| "loss": 0.1752, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16701434552669525, | |
| "step": 2550, | |
| "valid_targets_mean": 3613.5, | |
| "valid_targets_min": 1130 | |
| }, | |
| { | |
| "epoch": 6.355721393034826, | |
| "grad_norm": 0.5504035044096116, | |
| "learning_rate": 1.0316904359172297e-06, | |
| "loss": 0.1789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16181015968322754, | |
| "step": 2555, | |
| "valid_targets_mean": 3846.8, | |
| "valid_targets_min": 1976 | |
| }, | |
| { | |
| "epoch": 6.3681592039801, | |
| "grad_norm": 0.566462536612002, | |
| "learning_rate": 9.92720022384792e-07, | |
| "loss": 0.1668, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1657314896583557, | |
| "step": 2560, | |
| "valid_targets_mean": 4052.9, | |
| "valid_targets_min": 2216 | |
| }, | |
| { | |
| "epoch": 6.380597014925373, | |
| "grad_norm": 0.5700973608301066, | |
| "learning_rate": 9.544811368128703e-07, | |
| "loss": 0.1645, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17204223573207855, | |
| "step": 2565, | |
| "valid_targets_mean": 4212.5, | |
| "valid_targets_min": 1450 | |
| }, | |
| { | |
| "epoch": 6.393034825870647, | |
| "grad_norm": 0.5934427501781491, | |
| "learning_rate": 9.169752508909413e-07, | |
| "loss": 0.1724, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17751480638980865, | |
| "step": 2570, | |
| "valid_targets_mean": 3961.2, | |
| "valid_targets_min": 2303 | |
| }, | |
| { | |
| "epoch": 6.4054726368159205, | |
| "grad_norm": 0.5691373349683894, | |
| "learning_rate": 8.802038080977305e-07, | |
| "loss": 0.1796, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17300844192504883, | |
| "step": 2575, | |
| "valid_targets_mean": 3961.4, | |
| "valid_targets_min": 2205 | |
| }, | |
| { | |
| "epoch": 6.417910447761194, | |
| "grad_norm": 0.6340863357974218, | |
| "learning_rate": 8.441682236456472e-07, | |
| "loss": 0.1735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17821021378040314, | |
| "step": 2580, | |
| "valid_targets_mean": 3086.1, | |
| "valid_targets_min": 963 | |
| }, | |
| { | |
| "epoch": 6.430348258706467, | |
| "grad_norm": 0.579617792634382, | |
| "learning_rate": 8.088698844263243e-07, | |
| "loss": 0.1704, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16770654916763306, | |
| "step": 2585, | |
| "valid_targets_mean": 3542.5, | |
| "valid_targets_min": 2121 | |
| }, | |
| { | |
| "epoch": 6.442786069651741, | |
| "grad_norm": 0.5395856283137651, | |
| "learning_rate": 7.743101489572491e-07, | |
| "loss": 0.1797, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19851364195346832, | |
| "step": 2590, | |
| "valid_targets_mean": 4706.3, | |
| "valid_targets_min": 1763 | |
| }, | |
| { | |
| "epoch": 6.455223880597015, | |
| "grad_norm": 0.5238594468934492, | |
| "learning_rate": 7.404903473294612e-07, | |
| "loss": 0.1686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15577292442321777, | |
| "step": 2595, | |
| "valid_targets_mean": 4178.8, | |
| "valid_targets_min": 1773 | |
| }, | |
| { | |
| "epoch": 6.467661691542289, | |
| "grad_norm": 0.632754230583396, | |
| "learning_rate": 7.074117811563619e-07, | |
| "loss": 0.1698, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17483484745025635, | |
| "step": 2600, | |
| "valid_targets_mean": 3031.8, | |
| "valid_targets_min": 344 | |
| }, | |
| { | |
| "epoch": 6.480099502487562, | |
| "grad_norm": 0.5671291151476788, | |
| "learning_rate": 6.750757235236461e-07, | |
| "loss": 0.1739, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17332680523395538, | |
| "step": 2605, | |
| "valid_targets_mean": 3747.0, | |
| "valid_targets_min": 1149 | |
| }, | |
| { | |
| "epoch": 6.492537313432836, | |
| "grad_norm": 0.6093460900077653, | |
| "learning_rate": 6.434834189402716e-07, | |
| "loss": 0.175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18352875113487244, | |
| "step": 2610, | |
| "valid_targets_mean": 3692.0, | |
| "valid_targets_min": 1907 | |
| }, | |
| { | |
| "epoch": 6.5049751243781095, | |
| "grad_norm": 0.574114276787967, | |
| "learning_rate": 6.126360832905831e-07, | |
| "loss": 0.1819, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18569818139076233, | |
| "step": 2615, | |
| "valid_targets_mean": 3639.9, | |
| "valid_targets_min": 1544 | |
| }, | |
| { | |
| "epoch": 6.517412935323383, | |
| "grad_norm": 0.5146553892569846, | |
| "learning_rate": 5.825349037875106e-07, | |
| "loss": 0.1779, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16424797475337982, | |
| "step": 2620, | |
| "valid_targets_mean": 4595.1, | |
| "valid_targets_min": 1975 | |
| }, | |
| { | |
| "epoch": 6.529850746268656, | |
| "grad_norm": 0.5252261416414551, | |
| "learning_rate": 5.531810389268732e-07, | |
| "loss": 0.1601, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15995575487613678, | |
| "step": 2625, | |
| "valid_targets_mean": 4328.8, | |
| "valid_targets_min": 1661 | |
| }, | |
| { | |
| "epoch": 6.54228855721393, | |
| "grad_norm": 0.5685319838301356, | |
| "learning_rate": 5.245756184428041e-07, | |
| "loss": 0.1744, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18082097172737122, | |
| "step": 2630, | |
| "valid_targets_mean": 4233.4, | |
| "valid_targets_min": 1916 | |
| }, | |
| { | |
| "epoch": 6.554726368159204, | |
| "grad_norm": 0.5341982017596594, | |
| "learning_rate": 4.967197432642579e-07, | |
| "loss": 0.176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17173001170158386, | |
| "step": 2635, | |
| "valid_targets_mean": 4326.6, | |
| "valid_targets_min": 2650 | |
| }, | |
| { | |
| "epoch": 6.567164179104478, | |
| "grad_norm": 0.6401995160355601, | |
| "learning_rate": 4.69614485472647e-07, | |
| "loss": 0.1765, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17902860045433044, | |
| "step": 2640, | |
| "valid_targets_mean": 3158.5, | |
| "valid_targets_min": 191 | |
| }, | |
| { | |
| "epoch": 6.579601990049751, | |
| "grad_norm": 0.5667151478749745, | |
| "learning_rate": 4.432608882605771e-07, | |
| "loss": 0.1724, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17057490348815918, | |
| "step": 2645, | |
| "valid_targets_mean": 3628.1, | |
| "valid_targets_min": 2236 | |
| }, | |
| { | |
| "epoch": 6.592039800995025, | |
| "grad_norm": 0.5485021773091285, | |
| "learning_rate": 4.1765996589170353e-07, | |
| "loss": 0.1807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.179216206073761, | |
| "step": 2650, | |
| "valid_targets_mean": 4233.1, | |
| "valid_targets_min": 1682 | |
| }, | |
| { | |
| "epoch": 6.604477611940299, | |
| "grad_norm": 0.519072304817314, | |
| "learning_rate": 3.928127036616869e-07, | |
| "loss": 0.1752, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18321016430854797, | |
| "step": 2655, | |
| "valid_targets_mean": 4888.4, | |
| "valid_targets_min": 1732 | |
| }, | |
| { | |
| "epoch": 6.616915422885572, | |
| "grad_norm": 0.5786793950538549, | |
| "learning_rate": 3.687200578602812e-07, | |
| "loss": 0.1723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17171373963356018, | |
| "step": 2660, | |
| "valid_targets_mean": 4240.3, | |
| "valid_targets_min": 1550 | |
| }, | |
| { | |
| "epoch": 6.629353233830845, | |
| "grad_norm": 0.5589571697627682, | |
| "learning_rate": 3.453829557345212e-07, | |
| "loss": 0.1626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1652020364999771, | |
| "step": 2665, | |
| "valid_targets_mean": 3949.5, | |
| "valid_targets_min": 222 | |
| }, | |
| { | |
| "epoch": 6.641791044776119, | |
| "grad_norm": 0.5511197102525519, | |
| "learning_rate": 3.228022954530463e-07, | |
| "loss": 0.1778, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1774759292602539, | |
| "step": 2670, | |
| "valid_targets_mean": 4197.3, | |
| "valid_targets_min": 428 | |
| }, | |
| { | |
| "epoch": 6.654228855721393, | |
| "grad_norm": 0.5864879776585974, | |
| "learning_rate": 3.009789460715218e-07, | |
| "loss": 0.1902, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16247567534446716, | |
| "step": 2675, | |
| "valid_targets_mean": 3430.1, | |
| "valid_targets_min": 926 | |
| }, | |
| { | |
| "epoch": 6.666666666666667, | |
| "grad_norm": 0.5572934258361881, | |
| "learning_rate": 2.799137474991942e-07, | |
| "loss": 0.1784, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18331031501293182, | |
| "step": 2680, | |
| "valid_targets_mean": 3912.9, | |
| "valid_targets_min": 1883 | |
| }, | |
| { | |
| "epoch": 6.67910447761194, | |
| "grad_norm": 0.5559560602699174, | |
| "learning_rate": 2.5960751046657296e-07, | |
| "loss": 0.1733, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17147520184516907, | |
| "step": 2685, | |
| "valid_targets_mean": 4053.0, | |
| "valid_targets_min": 1895 | |
| }, | |
| { | |
| "epoch": 6.691542288557214, | |
| "grad_norm": 0.5575426373568327, | |
| "learning_rate": 2.400610164942241e-07, | |
| "loss": 0.1679, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16202571988105774, | |
| "step": 2690, | |
| "valid_targets_mean": 4100.9, | |
| "valid_targets_min": 2530 | |
| }, | |
| { | |
| "epoch": 6.703980099502488, | |
| "grad_norm": 0.5562136135591627, | |
| "learning_rate": 2.2127501786268546e-07, | |
| "loss": 0.169, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15551359951496124, | |
| "step": 2695, | |
| "valid_targets_mean": 4108.9, | |
| "valid_targets_min": 2029 | |
| }, | |
| { | |
| "epoch": 6.7164179104477615, | |
| "grad_norm": 0.5198839932899565, | |
| "learning_rate": 2.0325023758352545e-07, | |
| "loss": 0.1741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17046837508678436, | |
| "step": 2700, | |
| "valid_targets_mean": 4698.8, | |
| "valid_targets_min": 443 | |
| }, | |
| { | |
| "epoch": 6.7288557213930345, | |
| "grad_norm": 0.5938343168062021, | |
| "learning_rate": 1.859873693715075e-07, | |
| "loss": 0.1756, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1763916015625, | |
| "step": 2705, | |
| "valid_targets_mean": 4298.5, | |
| "valid_targets_min": 1533 | |
| }, | |
| { | |
| "epoch": 6.741293532338308, | |
| "grad_norm": 0.5495808847371538, | |
| "learning_rate": 1.6948707761789807e-07, | |
| "loss": 0.1751, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17928853631019592, | |
| "step": 2710, | |
| "valid_targets_mean": 4255.4, | |
| "valid_targets_min": 1937 | |
| }, | |
| { | |
| "epoch": 6.753731343283582, | |
| "grad_norm": 0.6045663915002609, | |
| "learning_rate": 1.5374999736488927e-07, | |
| "loss": 0.1703, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1754097044467926, | |
| "step": 2715, | |
| "valid_targets_mean": 3834.8, | |
| "valid_targets_min": 1388 | |
| }, | |
| { | |
| "epoch": 6.766169154228856, | |
| "grad_norm": 0.5553708025379133, | |
| "learning_rate": 1.3877673428116302e-07, | |
| "loss": 0.179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19348856806755066, | |
| "step": 2720, | |
| "valid_targets_mean": 4249.7, | |
| "valid_targets_min": 3032 | |
| }, | |
| { | |
| "epoch": 6.778606965174129, | |
| "grad_norm": 0.629594904612257, | |
| "learning_rate": 1.245678646385784e-07, | |
| "loss": 0.1668, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17597973346710205, | |
| "step": 2725, | |
| "valid_targets_mean": 3250.0, | |
| "valid_targets_min": 941 | |
| }, | |
| { | |
| "epoch": 6.791044776119403, | |
| "grad_norm": 0.5963075561787823, | |
| "learning_rate": 1.1112393529000288e-07, | |
| "loss": 0.1738, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1901685893535614, | |
| "step": 2730, | |
| "valid_targets_mean": 3795.7, | |
| "valid_targets_min": 2540 | |
| }, | |
| { | |
| "epoch": 6.803482587064677, | |
| "grad_norm": 0.5609315377417177, | |
| "learning_rate": 9.844546364824459e-08, | |
| "loss": 0.1721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17084071040153503, | |
| "step": 2735, | |
| "valid_targets_mean": 3876.4, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 6.8159203980099505, | |
| "grad_norm": 0.5043275099355118, | |
| "learning_rate": 8.653293766615945e-08, | |
| "loss": 0.1702, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16672548651695251, | |
| "step": 2740, | |
| "valid_targets_mean": 4784.5, | |
| "valid_targets_min": 1905 | |
| }, | |
| { | |
| "epoch": 6.8283582089552235, | |
| "grad_norm": 0.6108276462294715, | |
| "learning_rate": 7.538681581785945e-08, | |
| "loss": 0.176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18459632992744446, | |
| "step": 2745, | |
| "valid_targets_mean": 3379.5, | |
| "valid_targets_min": 852 | |
| }, | |
| { | |
| "epoch": 6.840796019900497, | |
| "grad_norm": 0.5499395587166259, | |
| "learning_rate": 6.50075270810735e-08, | |
| "loss": 0.1778, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16682520508766174, | |
| "step": 2750, | |
| "valid_targets_mean": 4087.9, | |
| "valid_targets_min": 1786 | |
| }, | |
| { | |
| "epoch": 6.853233830845771, | |
| "grad_norm": 0.5762377369105647, | |
| "learning_rate": 5.539547092063391e-08, | |
| "loss": 0.1763, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16152673959732056, | |
| "step": 2755, | |
| "valid_targets_mean": 3740.1, | |
| "valid_targets_min": 862 | |
| }, | |
| { | |
| "epoch": 6.865671641791045, | |
| "grad_norm": 0.6162665248042457, | |
| "learning_rate": 4.655101727310651e-08, | |
| "loss": 0.168, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17481867969036102, | |
| "step": 2760, | |
| "valid_targets_mean": 3653.2, | |
| "valid_targets_min": 2037 | |
| }, | |
| { | |
| "epoch": 6.878109452736318, | |
| "grad_norm": 0.5537138349691514, | |
| "learning_rate": 3.847450653254425e-08, | |
| "loss": 0.1831, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19818416237831116, | |
| "step": 2765, | |
| "valid_targets_mean": 4187.0, | |
| "valid_targets_min": 2037 | |
| }, | |
| { | |
| "epoch": 6.890547263681592, | |
| "grad_norm": 0.5538646057123637, | |
| "learning_rate": 3.1166249537402104e-08, | |
| "loss": 0.1747, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1677202731370926, | |
| "step": 2770, | |
| "valid_targets_mean": 4246.5, | |
| "valid_targets_min": 1887 | |
| }, | |
| { | |
| "epoch": 6.902985074626866, | |
| "grad_norm": 0.5458989012174927, | |
| "learning_rate": 2.4626527558551106e-08, | |
| "loss": 0.1684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16506440937519073, | |
| "step": 2775, | |
| "valid_targets_mean": 4147.2, | |
| "valid_targets_min": 1054 | |
| }, | |
| { | |
| "epoch": 6.91542288557214, | |
| "grad_norm": 0.5428319526484747, | |
| "learning_rate": 1.885559228847811e-08, | |
| "loss": 0.1753, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16628624498844147, | |
| "step": 2780, | |
| "valid_targets_mean": 4325.8, | |
| "valid_targets_min": 2837 | |
| }, | |
| { | |
| "epoch": 6.927860696517413, | |
| "grad_norm": 0.5215809087161279, | |
| "learning_rate": 1.385366583158243e-08, | |
| "loss": 0.1767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19465038180351257, | |
| "step": 2785, | |
| "valid_targets_mean": 4546.7, | |
| "valid_targets_min": 2100 | |
| }, | |
| { | |
| "epoch": 6.940298507462686, | |
| "grad_norm": 0.5104612235616637, | |
| "learning_rate": 9.620940695633797e-09, | |
| "loss": 0.183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1838764250278473, | |
| "step": 2790, | |
| "valid_targets_mean": 4532.4, | |
| "valid_targets_min": 2044 | |
| }, | |
| { | |
| "epoch": 6.95273631840796, | |
| "grad_norm": 0.5508437850202684, | |
| "learning_rate": 6.15757978435827e-09, | |
| "loss": 0.1744, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17743362486362457, | |
| "step": 2795, | |
| "valid_targets_mean": 4273.7, | |
| "valid_targets_min": 2596 | |
| }, | |
| { | |
| "epoch": 6.965174129353234, | |
| "grad_norm": 0.5364623622959384, | |
| "learning_rate": 3.463716391176597e-09, | |
| "loss": 0.1799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2021486759185791, | |
| "step": 2800, | |
| "valid_targets_mean": 4783.9, | |
| "valid_targets_min": 1910 | |
| }, | |
| { | |
| "epoch": 6.977611940298507, | |
| "grad_norm": 0.5071144651033146, | |
| "learning_rate": 1.5394541940705332e-09, | |
| "loss": 0.1691, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16921557486057281, | |
| "step": 2805, | |
| "valid_targets_mean": 4896.3, | |
| "valid_targets_min": 1519 | |
| }, | |
| { | |
| "epoch": 6.990049751243781, | |
| "grad_norm": 0.5760603012343628, | |
| "learning_rate": 3.848672515882612e-10, | |
| "loss": 0.1735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16937696933746338, | |
| "step": 2810, | |
| "valid_targets_mean": 3735.2, | |
| "valid_targets_min": 2097 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17162613570690155, | |
| "step": 2814, | |
| "total_flos": 1157609273950208.0, | |
| "train_loss": 0.22598929736066897, | |
| "train_runtime": 20554.2284, | |
| "train_samples_per_second": 2.189, | |
| "train_steps_per_second": 0.137, | |
| "valid_targets_mean": 3578.4, | |
| "valid_targets_min": 1983 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 2814, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1157609273950208.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |