diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3677 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 1652, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.021231422505307854, + "grad_norm": 6.601170763883668, + "learning_rate": 9.638554216867472e-07, + "loss": 0.5153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3147011399269104, + "step": 5, + "valid_targets_mean": 14395.1, + "valid_targets_min": 6135 + }, + { + "epoch": 0.04246284501061571, + "grad_norm": 4.640678277810576, + "learning_rate": 2.168674698795181e-06, + "loss": 0.4921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18654188513755798, + "step": 10, + "valid_targets_mean": 12533.2, + "valid_targets_min": 4349 + }, + { + "epoch": 0.06369426751592357, + "grad_norm": 2.8761928107591204, + "learning_rate": 3.3734939759036146e-06, + "loss": 0.4234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22491103410720825, + "step": 15, + "valid_targets_mean": 13778.4, + "valid_targets_min": 6404 + }, + { + "epoch": 0.08492569002123142, + "grad_norm": 2.0507756278474343, + "learning_rate": 4.578313253012049e-06, + "loss": 0.4198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2335529774427414, + "step": 20, + "valid_targets_mean": 13837.5, + "valid_targets_min": 3655 + }, + { + "epoch": 0.10615711252653928, + "grad_norm": 1.4535039164298584, + "learning_rate": 5.783132530120482e-06, + "loss": 0.4121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17189857363700867, + "step": 25, + "valid_targets_mean": 12292.6, + "valid_targets_min": 6259 + }, + { + "epoch": 0.12738853503184713, + "grad_norm": 0.7914310934537628, + "learning_rate": 6.987951807228917e-06, + "loss": 0.3847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24835364520549774, + "step": 30, + "valid_targets_mean": 14466.2, + "valid_targets_min": 9464 + }, + { + "epoch": 0.14861995753715498, + "grad_norm": 0.5293723876639078, + "learning_rate": 8.19277108433735e-06, + "loss": 0.3696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21188116073608398, + "step": 35, + "valid_targets_mean": 14101.8, + "valid_targets_min": 4063 + }, + { + "epoch": 0.16985138004246284, + "grad_norm": 0.5249820883384718, + "learning_rate": 9.397590361445785e-06, + "loss": 0.3606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17040115594863892, + "step": 40, + "valid_targets_mean": 14958.1, + "valid_targets_min": 6110 + }, + { + "epoch": 0.1910828025477707, + "grad_norm": 0.4219272827083708, + "learning_rate": 1.0602409638554219e-05, + "loss": 0.3188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16639268398284912, + "step": 45, + "valid_targets_mean": 12699.5, + "valid_targets_min": 5064 + }, + { + "epoch": 0.21231422505307856, + "grad_norm": 0.38858708893114424, + "learning_rate": 1.1807228915662651e-05, + "loss": 0.3358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18469196557998657, + "step": 50, + "valid_targets_mean": 12852.6, + "valid_targets_min": 4287 + }, + { + "epoch": 0.23354564755838642, + "grad_norm": 0.3799894773689016, + "learning_rate": 1.3012048192771085e-05, + "loss": 0.3134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20896856486797333, + "step": 55, + "valid_targets_mean": 15256.2, + "valid_targets_min": 5458 + }, + { + "epoch": 0.25477707006369427, + "grad_norm": 0.319379398181401, + "learning_rate": 1.4216867469879519e-05, + "loss": 0.2825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09708239883184433, + "step": 60, + "valid_targets_mean": 14549.4, + "valid_targets_min": 6224 + }, + { + "epoch": 0.2760084925690021, + "grad_norm": 0.2771017285361385, + "learning_rate": 1.5421686746987955e-05, + "loss": 0.269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16080784797668457, + "step": 65, + "valid_targets_mean": 15810.6, + "valid_targets_min": 8701 + }, + { + "epoch": 0.29723991507430997, + "grad_norm": 0.290052395391973, + "learning_rate": 1.6626506024096387e-05, + "loss": 0.2876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14474868774414062, + "step": 70, + "valid_targets_mean": 13943.5, + "valid_targets_min": 8170 + }, + { + "epoch": 0.3184713375796178, + "grad_norm": 0.2887366598094193, + "learning_rate": 1.783132530120482e-05, + "loss": 0.3045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20289857685565948, + "step": 75, + "valid_targets_mean": 19152.2, + "valid_targets_min": 8288 + }, + { + "epoch": 0.33970276008492567, + "grad_norm": 0.24501889780498082, + "learning_rate": 1.9036144578313255e-05, + "loss": 0.2576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14412395656108856, + "step": 80, + "valid_targets_mean": 16776.8, + "valid_targets_min": 6292 + }, + { + "epoch": 0.3609341825902335, + "grad_norm": 0.31757555588060904, + "learning_rate": 2.0240963855421687e-05, + "loss": 0.2864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15807807445526123, + "step": 85, + "valid_targets_mean": 11288.1, + "valid_targets_min": 5740 + }, + { + "epoch": 0.3821656050955414, + "grad_norm": 0.21005685787572534, + "learning_rate": 2.1445783132530123e-05, + "loss": 0.2537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08626846969127655, + "step": 90, + "valid_targets_mean": 15819.5, + "valid_targets_min": 7497 + }, + { + "epoch": 0.4033970276008493, + "grad_norm": 0.2942954097586065, + "learning_rate": 2.265060240963856e-05, + "loss": 0.2792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14525042474269867, + "step": 95, + "valid_targets_mean": 14950.8, + "valid_targets_min": 7206 + }, + { + "epoch": 0.42462845010615713, + "grad_norm": 0.297024377935559, + "learning_rate": 2.3855421686746988e-05, + "loss": 0.2496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1313462257385254, + "step": 100, + "valid_targets_mean": 12276.5, + "valid_targets_min": 4481 + }, + { + "epoch": 0.445859872611465, + "grad_norm": 0.3083155449002965, + "learning_rate": 2.5060240963855423e-05, + "loss": 0.2674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13870102167129517, + "step": 105, + "valid_targets_mean": 8789.2, + "valid_targets_min": 4863 + }, + { + "epoch": 0.46709129511677283, + "grad_norm": 0.22405694574565216, + "learning_rate": 2.6265060240963856e-05, + "loss": 0.2563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11005185544490814, + "step": 110, + "valid_targets_mean": 19546.9, + "valid_targets_min": 9253 + }, + { + "epoch": 0.4883227176220807, + "grad_norm": 0.32976545776333965, + "learning_rate": 2.746987951807229e-05, + "loss": 0.2934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1696811020374298, + "step": 115, + "valid_targets_mean": 11419.8, + "valid_targets_min": 4938 + }, + { + "epoch": 0.5095541401273885, + "grad_norm": 0.26726017461928253, + "learning_rate": 2.8674698795180727e-05, + "loss": 0.2538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0898926854133606, + "step": 120, + "valid_targets_mean": 14191.1, + "valid_targets_min": 3141 + }, + { + "epoch": 0.5307855626326964, + "grad_norm": 0.26693132304388195, + "learning_rate": 2.9879518072289156e-05, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10769972205162048, + "step": 125, + "valid_targets_mean": 15435.2, + "valid_targets_min": 5449 + }, + { + "epoch": 0.5520169851380042, + "grad_norm": 0.29793681607777117, + "learning_rate": 3.108433734939759e-05, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13548800349235535, + "step": 130, + "valid_targets_mean": 12220.0, + "valid_targets_min": 4963 + }, + { + "epoch": 0.5732484076433121, + "grad_norm": 0.2843851218180784, + "learning_rate": 3.228915662650603e-05, + "loss": 0.2449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08166906237602234, + "step": 135, + "valid_targets_mean": 16374.8, + "valid_targets_min": 7174 + }, + { + "epoch": 0.5944798301486199, + "grad_norm": 0.2537876339931588, + "learning_rate": 3.3493975903614457e-05, + "loss": 0.2345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11751864105463028, + "step": 140, + "valid_targets_mean": 10767.6, + "valid_targets_min": 900 + }, + { + "epoch": 0.6157112526539278, + "grad_norm": 0.26983132136557225, + "learning_rate": 3.4698795180722896e-05, + "loss": 0.2754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10696007311344147, + "step": 145, + "valid_targets_mean": 15567.0, + "valid_targets_min": 3478 + }, + { + "epoch": 0.6369426751592356, + "grad_norm": 0.2874455255323601, + "learning_rate": 3.590361445783133e-05, + "loss": 0.2422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10694894194602966, + "step": 150, + "valid_targets_mean": 14445.2, + "valid_targets_min": 5102 + }, + { + "epoch": 0.6581740976645435, + "grad_norm": 0.36018854059753647, + "learning_rate": 3.710843373493976e-05, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16881927847862244, + "step": 155, + "valid_targets_mean": 10835.9, + "valid_targets_min": 1377 + }, + { + "epoch": 0.6794055201698513, + "grad_norm": 0.3128091862915849, + "learning_rate": 3.83132530120482e-05, + "loss": 0.2405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10742859542369843, + "step": 160, + "valid_targets_mean": 12899.6, + "valid_targets_min": 8041 + }, + { + "epoch": 0.7006369426751592, + "grad_norm": 0.3176035358807374, + "learning_rate": 3.9518072289156625e-05, + "loss": 0.28, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10753054916858673, + "step": 165, + "valid_targets_mean": 14364.8, + "valid_targets_min": 2986 + }, + { + "epoch": 0.721868365180467, + "grad_norm": 0.27568705715038133, + "learning_rate": 3.9999597743398453e-05, + "loss": 0.2259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1222403347492218, + "step": 170, + "valid_targets_mean": 15836.6, + "valid_targets_min": 8080 + }, + { + "epoch": 0.7430997876857749, + "grad_norm": 0.3595846231890364, + "learning_rate": 3.999713956720898e-05, + "loss": 0.2501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14652863144874573, + "step": 175, + "valid_targets_mean": 17202.6, + "valid_targets_min": 9673 + }, + { + "epoch": 0.7643312101910829, + "grad_norm": 0.35289985950885067, + "learning_rate": 3.9992446965056756e-05, + "loss": 0.2561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16108444333076477, + "step": 180, + "valid_targets_mean": 14388.9, + "valid_targets_min": 4858 + }, + { + "epoch": 0.7855626326963907, + "grad_norm": 0.33155413679799534, + "learning_rate": 3.998552046128038e-05, + "loss": 0.2604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18604812026023865, + "step": 185, + "valid_targets_mean": 17653.5, + "valid_targets_min": 12562 + }, + { + "epoch": 0.8067940552016986, + "grad_norm": 0.26974442765891954, + "learning_rate": 3.997636082982853e-05, + "loss": 0.2231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11307717114686966, + "step": 190, + "valid_targets_mean": 14695.4, + "valid_targets_min": 1868 + }, + { + "epoch": 0.8280254777070064, + "grad_norm": 0.4484022169366936, + "learning_rate": 3.9964969094173506e-05, + "loss": 0.2456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13788184523582458, + "step": 195, + "valid_targets_mean": 11258.2, + "valid_targets_min": 5920 + }, + { + "epoch": 0.8492569002123143, + "grad_norm": 0.31884550186901484, + "learning_rate": 3.995134652719684e-05, + "loss": 0.2378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17065517604351044, + "step": 200, + "valid_targets_mean": 15263.8, + "valid_targets_min": 7771 + }, + { + "epoch": 0.8704883227176221, + "grad_norm": 0.2912088961412526, + "learning_rate": 3.993549465104712e-05, + "loss": 0.212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1016058549284935, + "step": 205, + "valid_targets_mean": 11488.8, + "valid_targets_min": 2254 + }, + { + "epoch": 0.89171974522293, + "grad_norm": 1.2538232123110695, + "learning_rate": 3.991741523696984e-05, + "loss": 0.2266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0836310163140297, + "step": 210, + "valid_targets_mean": 18902.0, + "valid_targets_min": 7919 + }, + { + "epoch": 0.9129511677282378, + "grad_norm": 0.3347545564566702, + "learning_rate": 3.989711030510954e-05, + "loss": 0.2398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12700827419757843, + "step": 215, + "valid_targets_mean": 11326.8, + "valid_targets_min": 5084 + }, + { + "epoch": 0.9341825902335457, + "grad_norm": 0.2971192130089368, + "learning_rate": 3.987458212428406e-05, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1275290548801422, + "step": 220, + "valid_targets_mean": 14325.8, + "valid_targets_min": 5076 + }, + { + "epoch": 0.9554140127388535, + "grad_norm": 0.33426267528626336, + "learning_rate": 3.984983321173101e-05, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15455971658229828, + "step": 225, + "valid_targets_mean": 15615.0, + "valid_targets_min": 6466 + }, + { + "epoch": 0.9766454352441614, + "grad_norm": 0.3247808671944862, + "learning_rate": 3.9822866332826555e-05, + "loss": 0.2246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11736075580120087, + "step": 230, + "valid_targets_mean": 17059.8, + "valid_targets_min": 7224 + }, + { + "epoch": 0.9978768577494692, + "grad_norm": 0.2812386799146817, + "learning_rate": 3.9793684500776356e-05, + "loss": 0.2331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1330154538154602, + "step": 235, + "valid_targets_mean": 13021.2, + "valid_targets_min": 5174 + }, + { + "epoch": 1.0169851380042463, + "grad_norm": 0.2755139866589168, + "learning_rate": 3.976229097627892e-05, + "loss": 0.2037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10062160342931747, + "step": 240, + "valid_targets_mean": 15395.4, + "valid_targets_min": 7250 + }, + { + "epoch": 1.0382165605095541, + "grad_norm": 0.30337736123776227, + "learning_rate": 3.972868926716127e-05, + "loss": 0.1971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08376499265432358, + "step": 245, + "valid_targets_mean": 17409.0, + "valid_targets_min": 7023 + }, + { + "epoch": 1.059447983014862, + "grad_norm": 0.2915807595114085, + "learning_rate": 3.969288312798693e-05, + "loss": 0.2454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11872377246618271, + "step": 250, + "valid_targets_mean": 14989.5, + "valid_targets_min": 6930 + }, + { + "epoch": 1.0806794055201698, + "grad_norm": 0.27726744741730325, + "learning_rate": 3.965487655963647e-05, + "loss": 0.2257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11883805692195892, + "step": 255, + "valid_targets_mean": 13760.0, + "valid_targets_min": 5931 + }, + { + "epoch": 1.1019108280254777, + "grad_norm": 0.29991341277236133, + "learning_rate": 3.961467380886042e-05, + "loss": 0.2189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08793854713439941, + "step": 260, + "valid_targets_mean": 11854.8, + "valid_targets_min": 7223 + }, + { + "epoch": 1.1231422505307855, + "grad_norm": 0.2811615074598358, + "learning_rate": 3.957227936780476e-05, + "loss": 0.2266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11964725703001022, + "step": 265, + "valid_targets_mean": 13769.2, + "valid_targets_min": 7965 + }, + { + "epoch": 1.1443736730360934, + "grad_norm": 0.2810663280814256, + "learning_rate": 3.952769797350899e-05, + "loss": 0.2161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.109311044216156, + "step": 270, + "valid_targets_mean": 14035.2, + "valid_targets_min": 6319 + }, + { + "epoch": 1.1656050955414012, + "grad_norm": 0.2518522428935926, + "learning_rate": 3.948093460737679e-05, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10714008659124374, + "step": 275, + "valid_targets_mean": 15836.6, + "valid_targets_min": 7593 + }, + { + "epoch": 1.186836518046709, + "grad_norm": 0.265837889297658, + "learning_rate": 3.943199449461944e-05, + "loss": 0.2426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11197318136692047, + "step": 280, + "valid_targets_mean": 15728.6, + "valid_targets_min": 8141 + }, + { + "epoch": 1.208067940552017, + "grad_norm": 0.28084898004784403, + "learning_rate": 3.938088310367199e-05, + "loss": 0.2234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08627848327159882, + "step": 285, + "valid_targets_mean": 15427.1, + "valid_targets_min": 6842 + }, + { + "epoch": 1.2292993630573248, + "grad_norm": 0.27746934501090276, + "learning_rate": 3.932760614558218e-05, + "loss": 0.2209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12175991386175156, + "step": 290, + "valid_targets_mean": 15384.0, + "valid_targets_min": 8328 + }, + { + "epoch": 1.2505307855626326, + "grad_norm": 0.29525237911812746, + "learning_rate": 3.9272169573372345e-05, + "loss": 0.23, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10661651194095612, + "step": 295, + "valid_targets_mean": 12726.1, + "valid_targets_min": 4463 + }, + { + "epoch": 1.2717622080679405, + "grad_norm": 0.3428937157148055, + "learning_rate": 3.921457958137421e-05, + "loss": 0.2649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12638840079307556, + "step": 300, + "valid_targets_mean": 13763.4, + "valid_targets_min": 6285 + }, + { + "epoch": 1.2929936305732483, + "grad_norm": 0.3714748650604558, + "learning_rate": 3.915484260453679e-05, + "loss": 0.238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12943370640277863, + "step": 305, + "valid_targets_mean": 11842.8, + "valid_targets_min": 2224 + }, + { + "epoch": 1.3142250530785562, + "grad_norm": 0.310114967686842, + "learning_rate": 3.909296531770732e-05, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1138845905661583, + "step": 310, + "valid_targets_mean": 12274.8, + "valid_targets_min": 5680 + }, + { + "epoch": 1.335456475583864, + "grad_norm": 0.27182436922475456, + "learning_rate": 3.902895463488547e-05, + "loss": 0.2209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10476358979940414, + "step": 315, + "valid_targets_mean": 14856.8, + "valid_targets_min": 5796 + }, + { + "epoch": 1.356687898089172, + "grad_norm": 0.31748294870719895, + "learning_rate": 3.896281770845076e-05, + "loss": 0.2109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10224044322967529, + "step": 320, + "valid_targets_mean": 10133.5, + "valid_targets_min": 5714 + }, + { + "epoch": 1.3779193205944797, + "grad_norm": 0.3826976146252936, + "learning_rate": 3.8894561928363396e-05, + "loss": 0.2003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11495161056518555, + "step": 325, + "valid_targets_mean": 15048.9, + "valid_targets_min": 5342 + }, + { + "epoch": 1.3991507430997876, + "grad_norm": 0.27319461570158804, + "learning_rate": 3.8824194921338516e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08650592714548111, + "step": 330, + "valid_targets_mean": 15681.1, + "valid_targets_min": 7221 + }, + { + "epoch": 1.4203821656050954, + "grad_norm": 0.2792309770737464, + "learning_rate": 3.875172454999402e-05, + "loss": 0.2081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10807353258132935, + "step": 335, + "valid_targets_mean": 13810.6, + "valid_targets_min": 4893 + }, + { + "epoch": 1.4416135881104033, + "grad_norm": 0.3182023284508512, + "learning_rate": 3.8677158911972e-05, + "loss": 0.2073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14491504430770874, + "step": 340, + "valid_targets_mean": 12870.5, + "valid_targets_min": 2474 + }, + { + "epoch": 1.4628450106157111, + "grad_norm": 0.2729629600919183, + "learning_rate": 3.860050633903395e-05, + "loss": 0.2098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13041123747825623, + "step": 345, + "valid_targets_mean": 14308.1, + "valid_targets_min": 7717 + }, + { + "epoch": 1.484076433121019, + "grad_norm": 0.31582590679097317, + "learning_rate": 3.8521775396129824e-05, + "loss": 0.2233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13939642906188965, + "step": 350, + "valid_targets_mean": 15401.0, + "valid_targets_min": 7556 + }, + { + "epoch": 1.5053078556263269, + "grad_norm": 0.30106861470253815, + "learning_rate": 3.8440974880440925e-05, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10552646219730377, + "step": 355, + "valid_targets_mean": 15419.1, + "valid_targets_min": 3604 + }, + { + "epoch": 1.5265392781316347, + "grad_norm": 0.24666022628823678, + "learning_rate": 3.835811382039703e-05, + "loss": 0.2098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09952588379383087, + "step": 360, + "valid_targets_mean": 13430.5, + "valid_targets_min": 6392 + }, + { + "epoch": 1.5477707006369426, + "grad_norm": 0.23263039306298894, + "learning_rate": 3.827320147466752e-05, + "loss": 0.2201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10834848880767822, + "step": 365, + "valid_targets_mean": 18282.2, + "valid_targets_min": 10348 + }, + { + "epoch": 1.5690021231422504, + "grad_norm": 0.2763418285871218, + "learning_rate": 3.818624733112687e-05, + "loss": 0.2437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09543554484844208, + "step": 370, + "valid_targets_mean": 15127.2, + "valid_targets_min": 4569 + }, + { + "epoch": 1.5902335456475583, + "grad_norm": 0.2344608159731727, + "learning_rate": 3.809726110579446e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10267725586891174, + "step": 375, + "valid_targets_mean": 18113.5, + "valid_targets_min": 5644 + }, + { + "epoch": 1.611464968152866, + "grad_norm": 0.40210264521938344, + "learning_rate": 3.8006252741748986e-05, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11398470401763916, + "step": 380, + "valid_targets_mean": 13665.8, + "valid_targets_min": 1344 + }, + { + "epoch": 1.632696390658174, + "grad_norm": 0.30513744618147365, + "learning_rate": 3.79132324080174e-05, + "loss": 0.2166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14099963009357452, + "step": 385, + "valid_targets_mean": 14852.2, + "valid_targets_min": 3364 + }, + { + "epoch": 1.6539278131634818, + "grad_norm": 0.22426713356424843, + "learning_rate": 3.781821049843869e-05, + "loss": 0.1943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07226351648569107, + "step": 390, + "valid_targets_mean": 18799.4, + "valid_targets_min": 10477 + }, + { + "epoch": 1.6751592356687897, + "grad_norm": 0.2739515977898349, + "learning_rate": 3.7721197630502485e-05, + "loss": 0.2147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11230744421482086, + "step": 395, + "valid_targets_mean": 14162.9, + "valid_targets_min": 4448 + }, + { + "epoch": 1.6963906581740975, + "grad_norm": 0.2920300807449177, + "learning_rate": 3.762220464416266e-05, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1347799301147461, + "step": 400, + "valid_targets_mean": 17425.0, + "valid_targets_min": 12687 + }, + { + "epoch": 1.7176220806794054, + "grad_norm": 0.37395274633289044, + "learning_rate": 3.7521242600626154e-05, + "loss": 0.1993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09538485109806061, + "step": 405, + "valid_targets_mean": 15063.0, + "valid_targets_min": 7612 + }, + { + "epoch": 1.7388535031847132, + "grad_norm": 0.330636894297862, + "learning_rate": 3.7418322781117e-05, + "loss": 0.2471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13609623908996582, + "step": 410, + "valid_targets_mean": 12677.8, + "valid_targets_min": 5740 + }, + { + "epoch": 1.7600849256900213, + "grad_norm": 0.28342681236643996, + "learning_rate": 3.731345668561577e-05, + "loss": 0.2065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0960487648844719, + "step": 415, + "valid_targets_mean": 12748.8, + "valid_targets_min": 7296 + }, + { + "epoch": 1.7813163481953291, + "grad_norm": 0.23683106588934166, + "learning_rate": 3.720665603157464e-05, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08303853869438171, + "step": 420, + "valid_targets_mean": 15981.0, + "valid_targets_min": 7057 + }, + { + "epoch": 1.802547770700637, + "grad_norm": 0.316249617780758, + "learning_rate": 3.7097932752608096e-05, + "loss": 0.219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1166648119688034, + "step": 425, + "valid_targets_mean": 16572.9, + "valid_targets_min": 11308 + }, + { + "epoch": 1.8237791932059448, + "grad_norm": 0.25450442249313526, + "learning_rate": 3.698729899715947e-05, + "loss": 0.1986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11825818568468094, + "step": 430, + "valid_targets_mean": 16501.6, + "valid_targets_min": 5425 + }, + { + "epoch": 1.8450106157112527, + "grad_norm": 0.2562691834652498, + "learning_rate": 3.687476712714358e-05, + "loss": 0.2078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08408856391906738, + "step": 435, + "valid_targets_mean": 13867.4, + "valid_targets_min": 5665 + }, + { + "epoch": 1.8662420382165605, + "grad_norm": 0.26720306087521367, + "learning_rate": 3.676034971656537e-05, + "loss": 0.1934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1064402386546135, + "step": 440, + "valid_targets_mean": 14102.1, + "valid_targets_min": 6476 + }, + { + "epoch": 1.8874734607218684, + "grad_norm": 0.21974899342329504, + "learning_rate": 3.664405955011498e-05, + "loss": 0.1917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08773387968540192, + "step": 445, + "valid_targets_mean": 17870.5, + "valid_targets_min": 9456 + }, + { + "epoch": 1.9087048832271762, + "grad_norm": 0.30795940792172116, + "learning_rate": 3.652590962173917e-05, + "loss": 0.2353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14995551109313965, + "step": 450, + "valid_targets_mean": 15358.0, + "valid_targets_min": 8778 + }, + { + "epoch": 1.929936305732484, + "grad_norm": 0.27504189449803096, + "learning_rate": 3.640591313318944e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10541120916604996, + "step": 455, + "valid_targets_mean": 15389.8, + "valid_targets_min": 5133 + }, + { + "epoch": 1.951167728237792, + "grad_norm": 0.29564860036710783, + "learning_rate": 3.628408349254693e-05, + "loss": 0.202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10049857199192047, + "step": 460, + "valid_targets_mean": 14645.1, + "valid_targets_min": 6715 + }, + { + "epoch": 1.9723991507430998, + "grad_norm": 0.25475277572273397, + "learning_rate": 3.616043431272417e-05, + "loss": 0.2048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07846721261739731, + "step": 465, + "valid_targets_mean": 16528.9, + "valid_targets_min": 6680 + }, + { + "epoch": 1.9936305732484076, + "grad_norm": 0.26142683635944247, + "learning_rate": 3.603497940994407e-05, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09435806423425674, + "step": 470, + "valid_targets_mean": 16962.5, + "valid_targets_min": 8108 + }, + { + "epoch": 2.0127388535031847, + "grad_norm": 0.24722160377441987, + "learning_rate": 3.59077328021961e-05, + "loss": 0.1976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08899471163749695, + "step": 475, + "valid_targets_mean": 12891.4, + "valid_targets_min": 9088 + }, + { + "epoch": 2.0339702760084926, + "grad_norm": 0.2517941803747066, + "learning_rate": 3.577870870766997e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09349747002124786, + "step": 480, + "valid_targets_mean": 13435.2, + "valid_targets_min": 6613 + }, + { + "epoch": 2.0552016985138004, + "grad_norm": 0.2758026420024873, + "learning_rate": 3.5647921543166923e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07516678422689438, + "step": 485, + "valid_targets_mean": 16082.9, + "valid_targets_min": 5458 + }, + { + "epoch": 2.0764331210191083, + "grad_norm": 0.24371704180908865, + "learning_rate": 3.5515385922488846e-05, + "loss": 0.1965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08999522030353546, + "step": 490, + "valid_targets_mean": 16669.6, + "valid_targets_min": 6356 + }, + { + "epoch": 2.097664543524416, + "grad_norm": 0.278212723013607, + "learning_rate": 3.5381116654805375e-05, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07290489971637726, + "step": 495, + "valid_targets_mean": 15383.8, + "valid_targets_min": 4535 + }, + { + "epoch": 2.118895966029724, + "grad_norm": 0.28587596752541133, + "learning_rate": 3.524512874299912e-05, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1112004891037941, + "step": 500, + "valid_targets_mean": 16063.8, + "valid_targets_min": 8790 + }, + { + "epoch": 2.140127388535032, + "grad_norm": 0.23694124431153454, + "learning_rate": 3.5107437381989325e-05, + "loss": 0.1988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06883738934993744, + "step": 505, + "valid_targets_mean": 15695.1, + "valid_targets_min": 5449 + }, + { + "epoch": 2.1613588110403397, + "grad_norm": 0.2572567842840769, + "learning_rate": 3.4968057957034e-05, + "loss": 0.194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09729857742786407, + "step": 510, + "valid_targets_mean": 16853.2, + "valid_targets_min": 3854 + }, + { + "epoch": 2.1825902335456475, + "grad_norm": 0.2897655571941794, + "learning_rate": 3.482700604201086e-05, + "loss": 0.1947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10338980704545975, + "step": 515, + "valid_targets_mean": 12990.2, + "valid_targets_min": 6250 + }, + { + "epoch": 2.2038216560509554, + "grad_norm": 0.3345559098061995, + "learning_rate": 3.4684297397677064e-05, + "loss": 0.2124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11578390747308731, + "step": 520, + "valid_targets_mean": 13486.2, + "valid_targets_min": 5214 + }, + { + "epoch": 2.225053078556263, + "grad_norm": 0.2647961046337731, + "learning_rate": 3.453994796990823e-05, + "loss": 0.2005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08278575539588928, + "step": 525, + "valid_targets_mean": 15026.2, + "valid_targets_min": 2535 + }, + { + "epoch": 2.246284501061571, + "grad_norm": 0.29321760721644347, + "learning_rate": 3.439397388791662e-05, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10423131287097931, + "step": 530, + "valid_targets_mean": 13677.5, + "valid_targets_min": 4331 + }, + { + "epoch": 2.267515923566879, + "grad_norm": 0.33439639128696136, + "learning_rate": 3.424639146244898e-05, + "loss": 0.2108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09143179655075073, + "step": 535, + "valid_targets_mean": 12497.9, + "valid_targets_min": 6576 + }, + { + "epoch": 2.2887473460721868, + "grad_norm": 0.31293816068157115, + "learning_rate": 3.409721718396395e-05, + "loss": 0.2073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12323853373527527, + "step": 540, + "valid_targets_mean": 15538.4, + "valid_targets_min": 9217 + }, + { + "epoch": 2.3099787685774946, + "grad_norm": 0.2932762754898537, + "learning_rate": 3.394646772078951e-05, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11464841663837433, + "step": 545, + "valid_targets_mean": 13407.5, + "valid_targets_min": 5234 + }, + { + "epoch": 2.3312101910828025, + "grad_norm": 0.31716101339217717, + "learning_rate": 3.379415991726047e-05, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0933525562286377, + "step": 550, + "valid_targets_mean": 10441.4, + "valid_targets_min": 3289 + }, + { + "epoch": 2.3524416135881103, + "grad_norm": 0.31294543987180895, + "learning_rate": 3.3640310791836375e-05, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11642187833786011, + "step": 555, + "valid_targets_mean": 15880.0, + "valid_targets_min": 6450 + }, + { + "epoch": 2.373673036093418, + "grad_norm": 0.23138968267395532, + "learning_rate": 3.348493753519987e-05, + "loss": 0.2171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0960564911365509, + "step": 560, + "valid_targets_mean": 18356.4, + "valid_targets_min": 13592 + }, + { + "epoch": 2.394904458598726, + "grad_norm": 0.296455985526213, + "learning_rate": 3.332805750833588e-05, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1138642281293869, + "step": 565, + "valid_targets_mean": 16774.5, + "valid_targets_min": 9339 + }, + { + "epoch": 2.416135881104034, + "grad_norm": 0.26969809385295945, + "learning_rate": 3.3169688240591735e-05, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08020391315221786, + "step": 570, + "valid_targets_mean": 11902.9, + "valid_targets_min": 6702 + }, + { + "epoch": 2.4373673036093417, + "grad_norm": 0.26145366450884633, + "learning_rate": 3.300984742771849e-05, + "loss": 0.1826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08663637936115265, + "step": 575, + "valid_targets_mean": 15054.6, + "valid_targets_min": 4839 + }, + { + "epoch": 2.4585987261146496, + "grad_norm": 0.32984191621067444, + "learning_rate": 3.284855292989363e-05, + "loss": 0.2016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10979083180427551, + "step": 580, + "valid_targets_mean": 14732.2, + "valid_targets_min": 5139 + }, + { + "epoch": 2.4798301486199574, + "grad_norm": 0.27365490330331865, + "learning_rate": 3.268582276972549e-05, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13272282481193542, + "step": 585, + "valid_targets_mean": 14038.0, + "valid_targets_min": 7854 + }, + { + "epoch": 2.5010615711252653, + "grad_norm": 0.25796595337009653, + "learning_rate": 3.252167513023934e-05, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09469525516033173, + "step": 590, + "valid_targets_mean": 17552.2, + "valid_targets_min": 9415 + }, + { + "epoch": 2.522292993630573, + "grad_norm": 0.26440522510501036, + "learning_rate": 3.2356128352845794e-05, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08657258749008179, + "step": 595, + "valid_targets_mean": 15092.1, + "valid_targets_min": 8187 + }, + { + "epoch": 2.543524416135881, + "grad_norm": 0.31227039684550767, + "learning_rate": 3.218920093529129e-05, + "loss": 0.1869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11756514757871628, + "step": 600, + "valid_targets_mean": 12069.6, + "valid_targets_min": 5064 + }, + { + "epoch": 2.564755838641189, + "grad_norm": 0.27723943310578775, + "learning_rate": 3.202091152959126e-05, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11652103066444397, + "step": 605, + "valid_targets_mean": 15984.1, + "valid_targets_min": 5816 + }, + { + "epoch": 2.5859872611464967, + "grad_norm": 0.2780638657313319, + "learning_rate": 3.1851278939945974e-05, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1163756474852562, + "step": 610, + "valid_targets_mean": 15140.8, + "valid_targets_min": 9131 + }, + { + "epoch": 2.6072186836518045, + "grad_norm": 0.32200630212205833, + "learning_rate": 3.1680322120639436e-05, + "loss": 0.2035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12698647379875183, + "step": 615, + "valid_targets_mean": 13908.4, + "valid_targets_min": 3655 + }, + { + "epoch": 2.6284501061571124, + "grad_norm": 0.28026722665310455, + "learning_rate": 3.150806017392145e-05, + "loss": 0.1815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10848917067050934, + "step": 620, + "valid_targets_mean": 15885.1, + "valid_targets_min": 8272 + }, + { + "epoch": 2.6496815286624202, + "grad_norm": 0.2987901602685072, + "learning_rate": 3.1334512347873215e-05, + "loss": 0.1946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1020238921046257, + "step": 625, + "valid_targets_mean": 16725.5, + "valid_targets_min": 6596 + }, + { + "epoch": 2.670912951167728, + "grad_norm": 0.3422773542295122, + "learning_rate": 3.1159698034256595e-05, + "loss": 0.1946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08330472558736801, + "step": 630, + "valid_targets_mean": 12442.0, + "valid_targets_min": 1587 + }, + { + "epoch": 2.692144373673036, + "grad_norm": 0.2815022503459199, + "learning_rate": 3.098363676634732e-05, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09158733487129211, + "step": 635, + "valid_targets_mean": 14677.6, + "valid_targets_min": 3623 + }, + { + "epoch": 2.713375796178344, + "grad_norm": 0.27699220688660753, + "learning_rate": 3.080634821675239e-05, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08198846876621246, + "step": 640, + "valid_targets_mean": 14876.6, + "valid_targets_min": 5491 + }, + { + "epoch": 2.7346072186836516, + "grad_norm": 0.27056674048902585, + "learning_rate": 3.0627852195211944e-05, + "loss": 0.1943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09581325948238373, + "step": 645, + "valid_targets_mean": 16048.8, + "valid_targets_min": 7561 + }, + { + "epoch": 2.7558386411889595, + "grad_norm": 0.25624506575317174, + "learning_rate": 3.0448168646385733e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09879995882511139, + "step": 650, + "valid_targets_mean": 16128.4, + "valid_targets_min": 11230 + }, + { + "epoch": 2.777070063694268, + "grad_norm": 0.25670398472243816, + "learning_rate": 3.0267317647624584e-05, + "loss": 0.2121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0653584823012352, + "step": 655, + "valid_targets_mean": 14782.5, + "valid_targets_min": 3532 + }, + { + "epoch": 2.798301486199575, + "grad_norm": 0.2724083240499696, + "learning_rate": 3.0085319406727003e-05, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08737077564001083, + "step": 660, + "valid_targets_mean": 16403.5, + "valid_targets_min": 4331 + }, + { + "epoch": 2.8195329087048835, + "grad_norm": 0.2383604800522291, + "learning_rate": 2.9902194259681203e-05, + "loss": 0.1886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07255426049232483, + "step": 665, + "valid_targets_mean": 15846.5, + "valid_targets_min": 7310 + }, + { + "epoch": 2.840764331210191, + "grad_norm": 0.30257307120700694, + "learning_rate": 2.9717962668392837e-05, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12048687040805817, + "step": 670, + "valid_targets_mean": 13953.0, + "valid_targets_min": 3689 + }, + { + "epoch": 2.861995753715499, + "grad_norm": 0.2522936277398451, + "learning_rate": 2.9532645218398608e-05, + "loss": 0.186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09158721566200256, + "step": 675, + "valid_targets_mean": 14446.5, + "valid_targets_min": 5751 + }, + { + "epoch": 2.8832271762208066, + "grad_norm": 0.2070318168754386, + "learning_rate": 2.9346262616566128e-05, + "loss": 0.1798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08559735119342804, + "step": 680, + "valid_targets_mean": 17995.4, + "valid_targets_min": 10423 + }, + { + "epoch": 2.904458598726115, + "grad_norm": 0.24087732064409983, + "learning_rate": 2.9158835688780188e-05, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08264927566051483, + "step": 685, + "valid_targets_mean": 13348.2, + "valid_targets_min": 6221 + }, + { + "epoch": 2.9256900212314223, + "grad_norm": 0.24300050232911719, + "learning_rate": 2.89703853776157e-05, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08173580467700958, + "step": 690, + "valid_targets_mean": 16869.4, + "valid_targets_min": 1387 + }, + { + "epoch": 2.9469214437367306, + "grad_norm": 0.26724986441117543, + "learning_rate": 2.878093273999765e-05, + "loss": 0.1836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08488611876964569, + "step": 695, + "valid_targets_mean": 15076.5, + "valid_targets_min": 6502 + }, + { + "epoch": 2.968152866242038, + "grad_norm": 0.2455958459356137, + "learning_rate": 2.859049894484828e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08177411556243896, + "step": 700, + "valid_targets_mean": 14060.8, + "valid_targets_min": 3622 + }, + { + "epoch": 2.9893842887473463, + "grad_norm": 0.2715224629184729, + "learning_rate": 2.8399105270721668e-05, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09079961478710175, + "step": 705, + "valid_targets_mean": 15069.6, + "valid_targets_min": 8482 + }, + { + "epoch": 3.008492569002123, + "grad_norm": 0.23573264403831284, + "learning_rate": 2.8206773103426187e-05, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07453001290559769, + "step": 710, + "valid_targets_mean": 13914.1, + "valid_targets_min": 3623 + }, + { + "epoch": 3.029723991507431, + "grad_norm": 0.28900177745868094, + "learning_rate": 2.8013523933634875e-05, + "loss": 0.2028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07700274884700775, + "step": 715, + "valid_targets_mean": 14241.4, + "valid_targets_min": 9250 + }, + { + "epoch": 3.050955414012739, + "grad_norm": 0.2794038536966578, + "learning_rate": 2.7819379354484124e-05, + "loss": 0.1776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07138693332672119, + "step": 720, + "valid_targets_mean": 14523.6, + "valid_targets_min": 3518 + }, + { + "epoch": 3.0721868365180467, + "grad_norm": 0.3075656321601474, + "learning_rate": 2.762436105916094e-05, + "loss": 0.1852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08331207185983658, + "step": 725, + "valid_targets_mean": 14277.2, + "valid_targets_min": 6321 + }, + { + "epoch": 3.0934182590233545, + "grad_norm": 0.268200382167701, + "learning_rate": 2.742849083847899e-05, + "loss": 0.2002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08485275506973267, + "step": 730, + "valid_targets_mean": 15997.1, + "valid_targets_min": 7462 + }, + { + "epoch": 3.1146496815286624, + "grad_norm": 0.26063481388615084, + "learning_rate": 2.7231790578443785e-05, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09242895990610123, + "step": 735, + "valid_targets_mean": 16829.9, + "valid_targets_min": 4851 + }, + { + "epoch": 3.1358811040339702, + "grad_norm": 0.33301761675595387, + "learning_rate": 2.7034282257807136e-05, + "loss": 0.1877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12262482196092606, + "step": 740, + "valid_targets_mean": 14170.1, + "valid_targets_min": 7297 + }, + { + "epoch": 3.157112526539278, + "grad_norm": 0.25559284472185234, + "learning_rate": 2.683598794561138e-05, + "loss": 0.1819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060392506420612335, + "step": 745, + "valid_targets_mean": 13418.9, + "valid_targets_min": 7538 + }, + { + "epoch": 3.178343949044586, + "grad_norm": 0.31989112975280076, + "learning_rate": 2.66369297987234e-05, + "loss": 0.1758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09339696168899536, + "step": 750, + "valid_targets_mean": 14859.2, + "valid_targets_min": 5714 + }, + { + "epoch": 3.199575371549894, + "grad_norm": 0.24047693551940477, + "learning_rate": 2.643713005935888e-05, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06566546857357025, + "step": 755, + "valid_targets_mean": 17566.6, + "valid_targets_min": 12773 + }, + { + "epoch": 3.2208067940552016, + "grad_norm": 0.24423971251668095, + "learning_rate": 2.6236611052597055e-05, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05638258159160614, + "step": 760, + "valid_targets_mean": 16585.4, + "valid_targets_min": 6836 + }, + { + "epoch": 3.2420382165605095, + "grad_norm": 0.22931906700860674, + "learning_rate": 2.603539518388611e-05, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.062216877937316895, + "step": 765, + "valid_targets_mean": 15363.1, + "valid_targets_min": 6726 + }, + { + "epoch": 3.2632696390658174, + "grad_norm": 0.29138046222131386, + "learning_rate": 2.5833504936539712e-05, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12089153379201889, + "step": 770, + "valid_targets_mean": 15873.8, + "valid_targets_min": 7979 + }, + { + "epoch": 3.284501061571125, + "grad_norm": 0.27630841743869844, + "learning_rate": 2.563096286922474e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0977085679769516, + "step": 775, + "valid_targets_mean": 13591.0, + "valid_targets_min": 3435 + }, + { + "epoch": 3.305732484076433, + "grad_norm": 0.31406681179293716, + "learning_rate": 2.54277916134407e-05, + "loss": 0.1825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06433968245983124, + "step": 780, + "valid_targets_mean": 13319.6, + "valid_targets_min": 2535 + }, + { + "epoch": 3.326963906581741, + "grad_norm": 0.26059306614788147, + "learning_rate": 2.5224013870990868e-05, + "loss": 0.1861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08149316906929016, + "step": 785, + "valid_targets_mean": 13039.1, + "valid_targets_min": 6808 + }, + { + "epoch": 3.3481953290870488, + "grad_norm": 0.2351953795497981, + "learning_rate": 2.5019652411445704e-05, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07320615649223328, + "step": 790, + "valid_targets_mean": 15469.1, + "valid_targets_min": 8955 + }, + { + "epoch": 3.3694267515923566, + "grad_norm": 0.2736683222828364, + "learning_rate": 2.4814730069598624e-05, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0710974708199501, + "step": 795, + "valid_targets_mean": 11276.5, + "valid_targets_min": 5920 + }, + { + "epoch": 3.3906581740976645, + "grad_norm": 0.27054094450445326, + "learning_rate": 2.460926974291451e-05, + "loss": 0.1916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07035864889621735, + "step": 800, + "valid_targets_mean": 17484.8, + "valid_targets_min": 8262 + }, + { + "epoch": 3.4118895966029723, + "grad_norm": 0.29197318400903655, + "learning_rate": 2.440329438897122e-05, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09986551851034164, + "step": 805, + "valid_targets_mean": 16090.8, + "valid_targets_min": 7707 + }, + { + "epoch": 3.43312101910828, + "grad_norm": 0.2314470232042232, + "learning_rate": 2.419682702289432e-05, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05505535006523132, + "step": 810, + "valid_targets_mean": 16037.1, + "valid_targets_min": 9644 + }, + { + "epoch": 3.454352441613588, + "grad_norm": 0.3222253587001989, + "learning_rate": 2.3989890714785505e-05, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09005002677440643, + "step": 815, + "valid_targets_mean": 16947.9, + "valid_targets_min": 6163 + }, + { + "epoch": 3.475583864118896, + "grad_norm": 0.3294379601590877, + "learning_rate": 2.3782508587144774e-05, + "loss": 0.1693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11677606403827667, + "step": 820, + "valid_targets_mean": 13542.0, + "valid_targets_min": 6462 + }, + { + "epoch": 3.4968152866242037, + "grad_norm": 0.29000006498568415, + "learning_rate": 2.3574703812286766e-05, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07387042045593262, + "step": 825, + "valid_targets_mean": 11624.4, + "valid_targets_min": 4488 + }, + { + "epoch": 3.5180467091295116, + "grad_norm": 0.2401705211916322, + "learning_rate": 2.3366499609751593e-05, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09939301013946533, + "step": 830, + "valid_targets_mean": 15575.9, + "valid_targets_min": 8394 + }, + { + "epoch": 3.5392781316348194, + "grad_norm": 0.28451927113712266, + "learning_rate": 2.3157919243710318e-05, + "loss": 0.1789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09718938171863556, + "step": 835, + "valid_targets_mean": 16160.0, + "valid_targets_min": 7174 + }, + { + "epoch": 3.5605095541401273, + "grad_norm": 0.3211013643205975, + "learning_rate": 2.2948986020365493e-05, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09303892403841019, + "step": 840, + "valid_targets_mean": 11994.6, + "valid_targets_min": 4268 + }, + { + "epoch": 3.581740976645435, + "grad_norm": 0.2746677955508634, + "learning_rate": 2.273972328534698e-05, + "loss": 0.2052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12424758076667786, + "step": 845, + "valid_targets_mean": 15329.9, + "valid_targets_min": 5514 + }, + { + "epoch": 3.602972399150743, + "grad_norm": 0.2916069982798815, + "learning_rate": 2.2530154421103386e-05, + "loss": 0.1627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0865262895822525, + "step": 850, + "valid_targets_mean": 12826.9, + "valid_targets_min": 1935 + }, + { + "epoch": 3.624203821656051, + "grad_norm": 0.329290804996052, + "learning_rate": 2.2320302844289366e-05, + "loss": 0.2028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1294848918914795, + "step": 855, + "valid_targets_mean": 14766.9, + "valid_targets_min": 4963 + }, + { + "epoch": 3.6454352441613587, + "grad_norm": 0.26352816238784327, + "learning_rate": 2.21101920031491e-05, + "loss": 0.1643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05863261595368385, + "step": 860, + "valid_targets_mean": 14058.9, + "valid_targets_min": 2079 + }, + { + "epoch": 3.6666666666666665, + "grad_norm": 0.27075913676748065, + "learning_rate": 2.1899845374896264e-05, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0771893858909607, + "step": 865, + "valid_targets_mean": 15660.5, + "valid_targets_min": 3825 + }, + { + "epoch": 3.6878980891719744, + "grad_norm": 0.2879633243259211, + "learning_rate": 2.168928646309074e-05, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0968056321144104, + "step": 870, + "valid_targets_mean": 13577.5, + "valid_targets_min": 1343 + }, + { + "epoch": 3.709129511677282, + "grad_norm": 0.3324325284111313, + "learning_rate": 2.14785387950124e-05, + "loss": 0.2002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10815715789794922, + "step": 875, + "valid_targets_mean": 13251.2, + "valid_targets_min": 5776 + }, + { + "epoch": 3.73036093418259, + "grad_norm": 0.24385270873622308, + "learning_rate": 2.1267625919032233e-05, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07621297240257263, + "step": 880, + "valid_targets_mean": 14647.6, + "valid_targets_min": 1283 + }, + { + "epoch": 3.7515923566878984, + "grad_norm": 0.28158194480710597, + "learning_rate": 2.10565714019811e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09309081733226776, + "step": 885, + "valid_targets_mean": 13250.6, + "valid_targets_min": 5586 + }, + { + "epoch": 3.7728237791932058, + "grad_norm": 0.28387353955537614, + "learning_rate": 2.0845398826516457e-05, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0900927409529686, + "step": 890, + "valid_targets_mean": 15713.0, + "valid_targets_min": 5478 + }, + { + "epoch": 3.794055201698514, + "grad_norm": 0.22676398483352012, + "learning_rate": 2.0634131788487278e-05, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0685080736875534, + "step": 895, + "valid_targets_mean": 17496.4, + "valid_targets_min": 11864 + }, + { + "epoch": 3.8152866242038215, + "grad_norm": 0.2894684737379915, + "learning_rate": 2.0422793894297533e-05, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09989657998085022, + "step": 900, + "valid_targets_mean": 15670.9, + "valid_targets_min": 5314 + }, + { + "epoch": 3.8365180467091298, + "grad_norm": 0.2529641193716833, + "learning_rate": 2.0211408758268468e-05, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08049357682466507, + "step": 905, + "valid_targets_mean": 19922.8, + "valid_targets_min": 5124 + }, + { + "epoch": 3.857749469214437, + "grad_norm": 0.3262452116430981, + "learning_rate": 2e-05, + "loss": 0.1815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06966280192136765, + "step": 910, + "valid_targets_mean": 10526.9, + "valid_targets_min": 3364 + }, + { + "epoch": 3.8789808917197455, + "grad_norm": 0.2935358593938512, + "learning_rate": 1.9788591241731535e-05, + "loss": 0.1781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09608276188373566, + "step": 915, + "valid_targets_mean": 13981.8, + "valid_targets_min": 8162 + }, + { + "epoch": 3.900212314225053, + "grad_norm": 0.27813488718681834, + "learning_rate": 1.9577206105702474e-05, + "loss": 0.1811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09818491339683533, + "step": 920, + "valid_targets_mean": 17414.1, + "valid_targets_min": 7717 + }, + { + "epoch": 3.921443736730361, + "grad_norm": 0.3884374840815922, + "learning_rate": 1.9365868211512725e-05, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12244383990764618, + "step": 925, + "valid_targets_mean": 13225.5, + "valid_targets_min": 7774 + }, + { + "epoch": 3.9426751592356686, + "grad_norm": 0.25803403519577645, + "learning_rate": 1.915460117348355e-05, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0736437737941742, + "step": 930, + "valid_targets_mean": 14138.8, + "valid_targets_min": 4599 + }, + { + "epoch": 3.963906581740977, + "grad_norm": 0.2948166835249158, + "learning_rate": 1.8943428598018904e-05, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10551305115222931, + "step": 935, + "valid_targets_mean": 13149.0, + "valid_targets_min": 5237 + }, + { + "epoch": 3.9851380042462843, + "grad_norm": 0.26157763886832613, + "learning_rate": 1.8732374080967774e-05, + "loss": 0.1664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08099900186061859, + "step": 940, + "valid_targets_mean": 16457.1, + "valid_targets_min": 5918 + }, + { + "epoch": 4.004246284501062, + "grad_norm": 0.22804078903359448, + "learning_rate": 1.8521461204987606e-05, + "loss": 0.1569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.044775694608688354, + "step": 945, + "valid_targets_mean": 16204.6, + "valid_targets_min": 7257 + }, + { + "epoch": 4.025477707006369, + "grad_norm": 0.2836852446635035, + "learning_rate": 1.8310713536909265e-05, + "loss": 0.1668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08778543770313263, + "step": 950, + "valid_targets_mean": 12359.9, + "valid_targets_min": 5076 + }, + { + "epoch": 4.046709129511678, + "grad_norm": 0.2442174895389702, + "learning_rate": 1.810015462510374e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09868457168340683, + "step": 955, + "valid_targets_mean": 17049.4, + "valid_targets_min": 5491 + }, + { + "epoch": 4.067940552016985, + "grad_norm": 0.3695645197841845, + "learning_rate": 1.7889807996850906e-05, + "loss": 0.2036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11026948690414429, + "step": 960, + "valid_targets_mean": 9681.6, + "valid_targets_min": 3605 + }, + { + "epoch": 4.089171974522293, + "grad_norm": 0.29979727831410075, + "learning_rate": 1.767969715571064e-05, + "loss": 0.1686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0982229933142662, + "step": 965, + "valid_targets_mean": 11373.5, + "valid_targets_min": 2622 + }, + { + "epoch": 4.110403397027601, + "grad_norm": 0.29739837712764017, + "learning_rate": 1.746984557889662e-05, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08777828514575958, + "step": 970, + "valid_targets_mean": 14991.1, + "valid_targets_min": 6941 + }, + { + "epoch": 4.131634819532909, + "grad_norm": 0.27118781352159604, + "learning_rate": 1.7260276714653023e-05, + "loss": 0.1704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0875612124800682, + "step": 975, + "valid_targets_mean": 14782.4, + "valid_targets_min": 7258 + }, + { + "epoch": 4.1528662420382165, + "grad_norm": 0.28359027589369706, + "learning_rate": 1.7051013979634514e-05, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10205516219139099, + "step": 980, + "valid_targets_mean": 18528.0, + "valid_targets_min": 12316 + }, + { + "epoch": 4.174097664543525, + "grad_norm": 0.3340589809342197, + "learning_rate": 1.684208075628969e-05, + "loss": 0.1826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09689417481422424, + "step": 985, + "valid_targets_mean": 13361.2, + "valid_targets_min": 5776 + }, + { + "epoch": 4.195329087048832, + "grad_norm": 0.3077583048219397, + "learning_rate": 1.6633500390248414e-05, + "loss": 0.1573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06832315027713776, + "step": 990, + "valid_targets_mean": 12119.1, + "valid_targets_min": 4323 + }, + { + "epoch": 4.2165605095541405, + "grad_norm": 0.2573294853602038, + "learning_rate": 1.642529618771324e-05, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05930829420685768, + "step": 995, + "valid_targets_mean": 15648.5, + "valid_targets_min": 7985 + }, + { + "epoch": 4.237791932059448, + "grad_norm": 0.2690214106625097, + "learning_rate": 1.6217491412855233e-05, + "loss": 0.1813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07982520759105682, + "step": 1000, + "valid_targets_mean": 14926.2, + "valid_targets_min": 1956 + }, + { + "epoch": 4.259023354564756, + "grad_norm": 0.3360157443708867, + "learning_rate": 1.60101092852145e-05, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04510154575109482, + "step": 1005, + "valid_targets_mean": 13575.0, + "valid_targets_min": 7250 + }, + { + "epoch": 4.280254777070064, + "grad_norm": 0.3205217949902666, + "learning_rate": 1.5803172977105686e-05, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10675063729286194, + "step": 1010, + "valid_targets_mean": 15840.2, + "valid_targets_min": 5089 + }, + { + "epoch": 4.301486199575372, + "grad_norm": 0.2583709576870678, + "learning_rate": 1.5596705611028792e-05, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06442227959632874, + "step": 1015, + "valid_targets_mean": 17846.0, + "valid_targets_min": 5469 + }, + { + "epoch": 4.322717622080679, + "grad_norm": 0.26010908830801727, + "learning_rate": 1.5390730257085494e-05, + "loss": 0.162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07889682799577713, + "step": 1020, + "valid_targets_mean": 16397.1, + "valid_targets_min": 12382 + }, + { + "epoch": 4.343949044585988, + "grad_norm": 0.3022177554051947, + "learning_rate": 1.5185269930401381e-05, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0844014585018158, + "step": 1025, + "valid_targets_mean": 11215.9, + "valid_targets_min": 4517 + }, + { + "epoch": 4.365180467091295, + "grad_norm": 0.27137051227613906, + "learning_rate": 1.4980347588554302e-05, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10084296762943268, + "step": 1030, + "valid_targets_mean": 17699.4, + "valid_targets_min": 5458 + }, + { + "epoch": 4.386411889596603, + "grad_norm": 0.28712761699778766, + "learning_rate": 1.4775986129009137e-05, + "loss": 0.1897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08913597464561462, + "step": 1035, + "valid_targets_mean": 15072.6, + "valid_targets_min": 2535 + }, + { + "epoch": 4.407643312101911, + "grad_norm": 0.28374049176794086, + "learning_rate": 1.4572208386559304e-05, + "loss": 0.1672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08083443343639374, + "step": 1040, + "valid_targets_mean": 15562.0, + "valid_targets_min": 6274 + }, + { + "epoch": 4.428874734607219, + "grad_norm": 0.289299652196656, + "learning_rate": 1.436903713077526e-05, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08103075623512268, + "step": 1045, + "valid_targets_mean": 13985.0, + "valid_targets_min": 7381 + }, + { + "epoch": 4.450106157112526, + "grad_norm": 0.29939314691306246, + "learning_rate": 1.4166495063460295e-05, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07846647500991821, + "step": 1050, + "valid_targets_mean": 15401.0, + "valid_targets_min": 5135 + }, + { + "epoch": 4.471337579617835, + "grad_norm": 0.322213805306961, + "learning_rate": 1.3964604816113896e-05, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10395080596208572, + "step": 1055, + "valid_targets_mean": 14610.8, + "valid_targets_min": 7159 + }, + { + "epoch": 4.492569002123142, + "grad_norm": 0.2866514130234711, + "learning_rate": 1.3763388947402953e-05, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07112490385770798, + "step": 1060, + "valid_targets_mean": 15032.0, + "valid_targets_min": 8317 + }, + { + "epoch": 4.51380042462845, + "grad_norm": 0.314478967030206, + "learning_rate": 1.3562869940641123e-05, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10800331830978394, + "step": 1065, + "valid_targets_mean": 15537.0, + "valid_targets_min": 5167 + }, + { + "epoch": 4.535031847133758, + "grad_norm": 0.28204747261924135, + "learning_rate": 1.3363070201276606e-05, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07255604863166809, + "step": 1070, + "valid_targets_mean": 14644.9, + "valid_targets_min": 4870 + }, + { + "epoch": 4.556263269639066, + "grad_norm": 0.27569273817889167, + "learning_rate": 1.316401205438862e-05, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06936567276716232, + "step": 1075, + "valid_targets_mean": 14241.4, + "valid_targets_min": 4601 + }, + { + "epoch": 4.5774946921443735, + "grad_norm": 0.26253469480509967, + "learning_rate": 1.2965717742192866e-05, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08657175302505493, + "step": 1080, + "valid_targets_mean": 15617.2, + "valid_targets_min": 5992 + }, + { + "epoch": 4.598726114649682, + "grad_norm": 0.28581659144121935, + "learning_rate": 1.276820942155622e-05, + "loss": 0.1702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08918562531471252, + "step": 1085, + "valid_targets_mean": 13600.9, + "valid_targets_min": 4402 + }, + { + "epoch": 4.619957537154989, + "grad_norm": 0.3073024926683609, + "learning_rate": 1.2571509161521007e-05, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07768109440803528, + "step": 1090, + "valid_targets_mean": 12548.6, + "valid_targets_min": 4815 + }, + { + "epoch": 4.6411889596602975, + "grad_norm": 0.3269547869469645, + "learning_rate": 1.2375638940839062e-05, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11400610208511353, + "step": 1095, + "valid_targets_mean": 12697.8, + "valid_targets_min": 5610 + }, + { + "epoch": 4.662420382165605, + "grad_norm": 0.27326799041082284, + "learning_rate": 1.2180620645515875e-05, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06606325507164001, + "step": 1100, + "valid_targets_mean": 10452.8, + "valid_targets_min": 5105 + }, + { + "epoch": 4.683651804670913, + "grad_norm": 0.31708108689013287, + "learning_rate": 1.1986476066365125e-05, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07225104421377182, + "step": 1105, + "valid_targets_mean": 10873.4, + "valid_targets_min": 5961 + }, + { + "epoch": 4.704883227176221, + "grad_norm": 0.34556552719228606, + "learning_rate": 1.179322689657381e-05, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13157233595848083, + "step": 1110, + "valid_targets_mean": 16111.0, + "valid_targets_min": 5477 + }, + { + "epoch": 4.726114649681529, + "grad_norm": 0.29047699922219883, + "learning_rate": 1.1600894729278333e-05, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07969208061695099, + "step": 1115, + "valid_targets_mean": 12312.8, + "valid_targets_min": 7979 + }, + { + "epoch": 4.747346072186836, + "grad_norm": 0.31878635054753074, + "learning_rate": 1.1409501055151726e-05, + "loss": 0.1663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11133340746164322, + "step": 1120, + "valid_targets_mean": 16930.0, + "valid_targets_min": 4513 + }, + { + "epoch": 4.768577494692145, + "grad_norm": 0.2894979007219718, + "learning_rate": 1.1219067260002352e-05, + "loss": 0.1481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08189079165458679, + "step": 1125, + "valid_targets_mean": 13020.8, + "valid_targets_min": 5919 + }, + { + "epoch": 4.789808917197452, + "grad_norm": 0.2974224757724096, + "learning_rate": 1.1029614622384307e-05, + "loss": 0.1763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0739152580499649, + "step": 1130, + "valid_targets_mean": 15006.2, + "valid_targets_min": 6402 + }, + { + "epoch": 4.81104033970276, + "grad_norm": 0.2740806790545324, + "learning_rate": 1.0841164311219812e-05, + "loss": 0.1665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09299416840076447, + "step": 1135, + "valid_targets_mean": 18061.0, + "valid_targets_min": 10000 + }, + { + "epoch": 4.832271762208068, + "grad_norm": 0.23454734933723237, + "learning_rate": 1.0653737383433869e-05, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08343719691038132, + "step": 1140, + "valid_targets_mean": 17752.4, + "valid_targets_min": 7344 + }, + { + "epoch": 4.853503184713376, + "grad_norm": 0.24458323267656765, + "learning_rate": 1.0467354781601395e-05, + "loss": 0.1664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06879070401191711, + "step": 1145, + "valid_targets_mean": 18738.1, + "valid_targets_min": 11083 + }, + { + "epoch": 4.8747346072186835, + "grad_norm": 0.26926970833953917, + "learning_rate": 1.0282037331607167e-05, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09153994917869568, + "step": 1150, + "valid_targets_mean": 12938.4, + "valid_targets_min": 6938 + }, + { + "epoch": 4.895966029723992, + "grad_norm": 0.2475631115627024, + "learning_rate": 1.0097805740318797e-05, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06908365339040756, + "step": 1155, + "valid_targets_mean": 14550.2, + "valid_targets_min": 2924 + }, + { + "epoch": 4.917197452229299, + "grad_norm": 0.26381821539779826, + "learning_rate": 9.914680593273e-06, + "loss": 0.1855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0847349539399147, + "step": 1160, + "valid_targets_mean": 16227.0, + "valid_targets_min": 2970 + }, + { + "epoch": 4.9384288747346075, + "grad_norm": 0.2536112248092482, + "learning_rate": 9.732682352375418e-06, + "loss": 0.1692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07469794154167175, + "step": 1165, + "valid_targets_mean": 16989.6, + "valid_targets_min": 1639 + }, + { + "epoch": 4.959660297239915, + "grad_norm": 0.26178169003572327, + "learning_rate": 9.551831353614272e-06, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0636444240808487, + "step": 1170, + "valid_targets_mean": 13780.4, + "valid_targets_min": 5417 + }, + { + "epoch": 4.980891719745223, + "grad_norm": 0.292574119747484, + "learning_rate": 9.372147804788063e-06, + "loss": 0.1838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10308250784873962, + "step": 1175, + "valid_targets_mean": 15759.5, + "valid_targets_min": 6097 + }, + { + "epoch": 5.0, + "grad_norm": 0.3633457948840684, + "learning_rate": 9.193651783247616e-06, + "loss": 0.1652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1370236575603485, + "step": 1180, + "valid_targets_mean": 13572.6, + "valid_targets_min": 8150 + }, + { + "epoch": 5.021231422505308, + "grad_norm": 0.32191020526943465, + "learning_rate": 9.016363233652686e-06, + "loss": 0.1657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10346332937479019, + "step": 1185, + "valid_targets_mean": 14068.4, + "valid_targets_min": 4870 + }, + { + "epoch": 5.042462845010616, + "grad_norm": 0.32642486899462897, + "learning_rate": 8.840301965743405e-06, + "loss": 0.1813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09915921092033386, + "step": 1190, + "valid_targets_mean": 11485.2, + "valid_targets_min": 5336 + }, + { + "epoch": 5.063694267515924, + "grad_norm": 0.28541664235799485, + "learning_rate": 8.665487652126785e-06, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05272082984447479, + "step": 1195, + "valid_targets_mean": 14355.5, + "valid_targets_min": 6036 + }, + { + "epoch": 5.084925690021231, + "grad_norm": 0.290919100504366, + "learning_rate": 8.491939826078552e-06, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08834843337535858, + "step": 1200, + "valid_targets_mean": 12843.2, + "valid_targets_min": 6392 + }, + { + "epoch": 5.10615711252654, + "grad_norm": 0.24588022878035837, + "learning_rate": 8.319677879360566e-06, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06265243887901306, + "step": 1205, + "valid_targets_mean": 14097.9, + "valid_targets_min": 1479 + }, + { + "epoch": 5.127388535031847, + "grad_norm": 0.26986615918502915, + "learning_rate": 8.148721060054026e-06, + "loss": 0.1576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09268582612276077, + "step": 1210, + "valid_targets_mean": 18160.4, + "valid_targets_min": 10243 + }, + { + "epoch": 5.148619957537155, + "grad_norm": 0.2900102170992239, + "learning_rate": 7.979088470408743e-06, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0702872946858406, + "step": 1215, + "valid_targets_mean": 13231.0, + "valid_targets_min": 6621 + }, + { + "epoch": 5.169851380042463, + "grad_norm": 0.2778789233044835, + "learning_rate": 7.81079906470872e-06, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08230331540107727, + "step": 1220, + "valid_targets_mean": 11612.5, + "valid_targets_min": 4938 + }, + { + "epoch": 5.191082802547771, + "grad_norm": 0.2746339546722902, + "learning_rate": 7.643871647154212e-06, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07877236604690552, + "step": 1225, + "valid_targets_mean": 13966.2, + "valid_targets_min": 5918 + }, + { + "epoch": 5.2123142250530785, + "grad_norm": 0.29827654767268313, + "learning_rate": 7.478324869760665e-06, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08680558204650879, + "step": 1230, + "valid_targets_mean": 14225.1, + "valid_targets_min": 6634 + }, + { + "epoch": 5.233545647558387, + "grad_norm": 0.3635364050170567, + "learning_rate": 7.314177230274522e-06, + "loss": 0.1498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07186665385961533, + "step": 1235, + "valid_targets_mean": 14451.9, + "valid_targets_min": 7291 + }, + { + "epoch": 5.254777070063694, + "grad_norm": 0.26514167860622967, + "learning_rate": 7.151447070106372e-06, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0701196938753128, + "step": 1240, + "valid_targets_mean": 13301.2, + "valid_targets_min": 3783 + }, + { + "epoch": 5.2760084925690025, + "grad_norm": 0.26324169878744724, + "learning_rate": 6.990152572281523e-06, + "loss": 0.1682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0794605165719986, + "step": 1245, + "valid_targets_mean": 16191.2, + "valid_targets_min": 8450 + }, + { + "epoch": 5.29723991507431, + "grad_norm": 0.2556220773865451, + "learning_rate": 6.830311759408275e-06, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0679427832365036, + "step": 1250, + "valid_targets_mean": 19563.9, + "valid_targets_min": 12610 + }, + { + "epoch": 5.318471337579618, + "grad_norm": 0.2512037430973632, + "learning_rate": 6.671942491664128e-06, + "loss": 0.1583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08314774930477142, + "step": 1255, + "valid_targets_mean": 17775.9, + "valid_targets_min": 6844 + }, + { + "epoch": 5.339702760084926, + "grad_norm": 0.301451978575514, + "learning_rate": 6.515062464800139e-06, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09716193377971649, + "step": 1260, + "valid_targets_mean": 13520.8, + "valid_targets_min": 6446 + }, + { + "epoch": 5.360934182590234, + "grad_norm": 0.3033561761745026, + "learning_rate": 6.359689208163635e-06, + "loss": 0.1786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09750799834728241, + "step": 1265, + "valid_targets_mean": 13563.6, + "valid_targets_min": 6054 + }, + { + "epoch": 5.382165605095541, + "grad_norm": 0.2867076697834142, + "learning_rate": 6.205840082739538e-06, + "loss": 0.1704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08606909960508347, + "step": 1270, + "valid_targets_mean": 15594.4, + "valid_targets_min": 7437 + }, + { + "epoch": 5.40339702760085, + "grad_norm": 0.2902611080814107, + "learning_rate": 6.053532279210494e-06, + "loss": 0.1819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09306415915489197, + "step": 1275, + "valid_targets_mean": 13659.0, + "valid_targets_min": 6067 + }, + { + "epoch": 5.424628450106157, + "grad_norm": 0.3138642504116254, + "learning_rate": 5.90278281603605e-06, + "loss": 0.1516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06723940372467041, + "step": 1280, + "valid_targets_mean": 12938.2, + "valid_targets_min": 7569 + }, + { + "epoch": 5.445859872611465, + "grad_norm": 0.37944831844559856, + "learning_rate": 5.753608537551023e-06, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08615720272064209, + "step": 1285, + "valid_targets_mean": 20076.0, + "valid_targets_min": 8700 + }, + { + "epoch": 5.467091295116773, + "grad_norm": 0.30159242882323506, + "learning_rate": 5.606026112083383e-06, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09909145534038544, + "step": 1290, + "valid_targets_mean": 16584.8, + "valid_targets_min": 7272 + }, + { + "epoch": 5.488322717622081, + "grad_norm": 0.29081583680889445, + "learning_rate": 5.460052030091782e-06, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0889005959033966, + "step": 1295, + "valid_targets_mean": 17472.0, + "valid_targets_min": 8288 + }, + { + "epoch": 5.509554140127388, + "grad_norm": 0.2607145273144808, + "learning_rate": 5.315702602322943e-06, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0712738037109375, + "step": 1300, + "valid_targets_mean": 14735.1, + "valid_targets_min": 7641 + }, + { + "epoch": 5.530785562632697, + "grad_norm": 0.29501459858586015, + "learning_rate": 5.1729939579891476e-06, + "loss": 0.162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07912551611661911, + "step": 1305, + "valid_targets_mean": 13685.9, + "valid_targets_min": 7765 + }, + { + "epoch": 5.552016985138004, + "grad_norm": 0.27509948934796713, + "learning_rate": 5.031942042966e-06, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08225835859775543, + "step": 1310, + "valid_targets_mean": 16989.0, + "valid_targets_min": 8083 + }, + { + "epoch": 5.573248407643312, + "grad_norm": 0.22169708987849615, + "learning_rate": 4.892562618010684e-06, + "loss": 0.1361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04339899122714996, + "step": 1315, + "valid_targets_mean": 17842.2, + "valid_targets_min": 8176 + }, + { + "epoch": 5.59447983014862, + "grad_norm": 0.3559664931393158, + "learning_rate": 4.754871257000888e-06, + "loss": 0.1583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11029928922653198, + "step": 1320, + "valid_targets_mean": 14398.4, + "valid_targets_min": 8162 + }, + { + "epoch": 5.615711252653928, + "grad_norm": 0.2679685791920965, + "learning_rate": 4.618883345194627e-06, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09200026094913483, + "step": 1325, + "valid_targets_mean": 17098.9, + "valid_targets_min": 7659 + }, + { + "epoch": 5.6369426751592355, + "grad_norm": 0.3057411211787925, + "learning_rate": 4.484614077511153e-06, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08388785272836685, + "step": 1330, + "valid_targets_mean": 13349.4, + "valid_targets_min": 7026 + }, + { + "epoch": 5.658174097664544, + "grad_norm": 0.2888169299715042, + "learning_rate": 4.352078456833082e-06, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10811308026313782, + "step": 1335, + "valid_targets_mean": 15849.9, + "valid_targets_min": 5320 + }, + { + "epoch": 5.679405520169851, + "grad_norm": 0.28951569017216344, + "learning_rate": 4.221291292330036e-06, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1030922383069992, + "step": 1340, + "valid_targets_mean": 15074.6, + "valid_targets_min": 9428 + }, + { + "epoch": 5.7006369426751595, + "grad_norm": 0.27915439610100806, + "learning_rate": 4.0922671978039055e-06, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06682918220758438, + "step": 1345, + "valid_targets_mean": 13675.4, + "valid_targets_min": 5223 + }, + { + "epoch": 5.721868365180467, + "grad_norm": 0.2747466866708511, + "learning_rate": 3.965020590055934e-06, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10387305170297623, + "step": 1350, + "valid_targets_mean": 16905.1, + "valid_targets_min": 10448 + }, + { + "epoch": 5.743099787685775, + "grad_norm": 0.39319047182394823, + "learning_rate": 3.839565687275835e-06, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10636863112449646, + "step": 1355, + "valid_targets_mean": 10298.1, + "valid_targets_min": 5818 + }, + { + "epoch": 5.764331210191083, + "grad_norm": 0.2508928317334097, + "learning_rate": 3.715916507453079e-06, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06272110342979431, + "step": 1360, + "valid_targets_mean": 14815.4, + "valid_targets_min": 3317 + }, + { + "epoch": 5.785562632696391, + "grad_norm": 0.2958681304685539, + "learning_rate": 3.5940868668105644e-06, + "loss": 0.1408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07483332604169846, + "step": 1365, + "valid_targets_mean": 11648.4, + "valid_targets_min": 5762 + }, + { + "epoch": 5.806794055201698, + "grad_norm": 0.30333009062567456, + "learning_rate": 3.4740903782608416e-06, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08001961559057236, + "step": 1370, + "valid_targets_mean": 15420.4, + "valid_targets_min": 5495 + }, + { + "epoch": 5.828025477707007, + "grad_norm": 0.27437345080245856, + "learning_rate": 3.3559404498850245e-06, + "loss": 0.1836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08367906510829926, + "step": 1375, + "valid_targets_mean": 17487.1, + "valid_targets_min": 10947 + }, + { + "epoch": 5.849256900212314, + "grad_norm": 0.33157275873211317, + "learning_rate": 3.2396502834346277e-06, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10356227308511734, + "step": 1380, + "valid_targets_mean": 13732.8, + "valid_targets_min": 5999 + }, + { + "epoch": 5.870488322717622, + "grad_norm": 0.25499987617159037, + "learning_rate": 3.1252328728564206e-06, + "loss": 0.1609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06108412891626358, + "step": 1385, + "valid_targets_mean": 13659.1, + "valid_targets_min": 6581 + }, + { + "epoch": 5.89171974522293, + "grad_norm": 0.2754456032364648, + "learning_rate": 3.0127010028405303e-06, + "loss": 0.1604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07066244632005692, + "step": 1390, + "valid_targets_mean": 16501.9, + "valid_targets_min": 7600 + }, + { + "epoch": 5.912951167728238, + "grad_norm": 0.3154111494075387, + "learning_rate": 2.9020672473919107e-06, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11153072118759155, + "step": 1395, + "valid_targets_mean": 12126.4, + "valid_targets_min": 1587 + }, + { + "epoch": 5.934182590233545, + "grad_norm": 0.28035453553485096, + "learning_rate": 2.7933439684253616e-06, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06870816648006439, + "step": 1400, + "valid_targets_mean": 13538.0, + "valid_targets_min": 6607 + }, + { + "epoch": 5.955414012738854, + "grad_norm": 0.3259723507914851, + "learning_rate": 2.6865433143842356e-06, + "loss": 0.1693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10865054279565811, + "step": 1405, + "valid_targets_mean": 12518.4, + "valid_targets_min": 4916 + }, + { + "epoch": 5.976645435244161, + "grad_norm": 0.334412099283546, + "learning_rate": 2.5816772188830098e-06, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08065322041511536, + "step": 1410, + "valid_targets_mean": 12634.0, + "valid_targets_min": 6610 + }, + { + "epoch": 5.997876857749469, + "grad_norm": 0.2640023033609606, + "learning_rate": 2.4787573993738524e-06, + "loss": 0.1631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048118408769369125, + "step": 1415, + "valid_targets_mean": 10203.4, + "valid_targets_min": 1607 + }, + { + "epoch": 6.016985138004246, + "grad_norm": 0.2714448832343656, + "learning_rate": 2.377795355837349e-06, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07569775730371475, + "step": 1420, + "valid_targets_mean": 13937.0, + "valid_targets_min": 4540 + }, + { + "epoch": 6.038216560509555, + "grad_norm": 0.3472240836287177, + "learning_rate": 2.2788023694975236e-06, + "loss": 0.1663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09323162585496902, + "step": 1425, + "valid_targets_mean": 15866.6, + "valid_targets_min": 3141 + }, + { + "epoch": 6.059447983014862, + "grad_norm": 0.2350240326376912, + "learning_rate": 2.1817895015613134e-06, + "loss": 0.1575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07922834157943726, + "step": 1430, + "valid_targets_mean": 18886.6, + "valid_targets_min": 9895 + }, + { + "epoch": 6.08067940552017, + "grad_norm": 0.36818142115866936, + "learning_rate": 2.086767591982608e-06, + "loss": 0.1529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07837212830781937, + "step": 1435, + "valid_targets_mean": 12160.6, + "valid_targets_min": 3368 + }, + { + "epoch": 6.101910828025478, + "grad_norm": 0.2856655219402555, + "learning_rate": 1.9937472582510243e-06, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09155093133449554, + "step": 1440, + "valid_targets_mean": 15443.0, + "valid_targets_min": 5076 + }, + { + "epoch": 6.123142250530786, + "grad_norm": 0.2547340160025767, + "learning_rate": 1.902738894205547e-06, + "loss": 0.1554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059237219393253326, + "step": 1445, + "valid_targets_mean": 14534.0, + "valid_targets_min": 5035 + }, + { + "epoch": 6.144373673036093, + "grad_norm": 0.3002693947204139, + "learning_rate": 1.8137526688731365e-06, + "loss": 0.1596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0889972448348999, + "step": 1450, + "valid_targets_mean": 16488.1, + "valid_targets_min": 8118 + }, + { + "epoch": 6.165605095541402, + "grad_norm": 0.298982073220577, + "learning_rate": 1.7267985253324803e-06, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07069272547960281, + "step": 1455, + "valid_targets_mean": 15787.2, + "valid_targets_min": 6470 + }, + { + "epoch": 6.186836518046709, + "grad_norm": 0.32579161912478005, + "learning_rate": 1.641886179602974e-06, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07076440751552582, + "step": 1460, + "valid_targets_mean": 14006.6, + "valid_targets_min": 1307 + }, + { + "epoch": 6.208067940552017, + "grad_norm": 0.3669756453303981, + "learning_rate": 1.5590251195590811e-06, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06793823093175888, + "step": 1465, + "valid_targets_mean": 15177.5, + "valid_targets_min": 2535 + }, + { + "epoch": 6.229299363057325, + "grad_norm": 0.2450071301139486, + "learning_rate": 1.4782246038701865e-06, + "loss": 0.1708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06423592567443848, + "step": 1470, + "valid_targets_mean": 17624.6, + "valid_targets_min": 9107 + }, + { + "epoch": 6.250530785562633, + "grad_norm": 0.29353402854137134, + "learning_rate": 1.3994936609660493e-06, + "loss": 0.1735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06590164452791214, + "step": 1475, + "valid_targets_mean": 11156.4, + "valid_targets_min": 5149 + }, + { + "epoch": 6.2717622080679405, + "grad_norm": 0.3270038830460443, + "learning_rate": 1.3228410880280084e-06, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09932979941368103, + "step": 1480, + "valid_targets_mean": 13644.0, + "valid_targets_min": 6297 + }, + { + "epoch": 6.292993630573249, + "grad_norm": 0.2789895464186324, + "learning_rate": 1.248275450005987e-06, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08671200275421143, + "step": 1485, + "valid_targets_mean": 17008.0, + "valid_targets_min": 6585 + }, + { + "epoch": 6.314225053078556, + "grad_norm": 0.2922442549795321, + "learning_rate": 1.1758050786614872e-06, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08488278090953827, + "step": 1490, + "valid_targets_mean": 15370.1, + "valid_targets_min": 8217 + }, + { + "epoch": 6.3354564755838645, + "grad_norm": 0.2582229867861875, + "learning_rate": 1.1054380716366064e-06, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06779177486896515, + "step": 1495, + "valid_targets_mean": 14717.9, + "valid_targets_min": 5041 + }, + { + "epoch": 6.356687898089172, + "grad_norm": 0.2331255850760353, + "learning_rate": 1.0371822915492414e-06, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07795362919569016, + "step": 1500, + "valid_targets_mean": 16383.0, + "valid_targets_min": 8819 + }, + { + "epoch": 6.37791932059448, + "grad_norm": 0.2892889195801475, + "learning_rate": 9.710453651145335e-07, + "loss": 0.1634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07211548089981079, + "step": 1505, + "valid_targets_mean": 13909.4, + "valid_targets_min": 634 + }, + { + "epoch": 6.399150743099788, + "grad_norm": 0.26060830596236895, + "learning_rate": 9.070346822926846e-07, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09147711843252182, + "step": 1510, + "valid_targets_mean": 16547.1, + "valid_targets_min": 8024 + }, + { + "epoch": 6.420382165605096, + "grad_norm": 0.26764389867523897, + "learning_rate": 8.451573954632186e-07, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08518597483634949, + "step": 1515, + "valid_targets_mean": 16291.4, + "valid_targets_min": 3623 + }, + { + "epoch": 6.441613588110403, + "grad_norm": 0.30728258811352377, + "learning_rate": 7.854204186257952e-07, + "loss": 0.1543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1069483608007431, + "step": 1520, + "valid_targets_mean": 14551.8, + "valid_targets_min": 7637 + }, + { + "epoch": 6.462845010615712, + "grad_norm": 0.27808926869463985, + "learning_rate": 7.278304266276625e-07, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06044379994273186, + "step": 1525, + "valid_targets_mean": 13939.4, + "valid_targets_min": 5659 + }, + { + "epoch": 6.484076433121019, + "grad_norm": 0.23829230423437128, + "learning_rate": 6.723938544178232e-07, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06626199930906296, + "step": 1530, + "valid_targets_mean": 18216.4, + "valid_targets_min": 6292 + }, + { + "epoch": 6.505307855626327, + "grad_norm": 0.28068157891206974, + "learning_rate": 6.191168963280136e-07, + "loss": 0.1545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07529743015766144, + "step": 1535, + "valid_targets_mean": 13190.6, + "valid_targets_min": 5603 + }, + { + "epoch": 6.526539278131635, + "grad_norm": 0.26745162960671476, + "learning_rate": 5.680055053805622e-07, + "loss": 0.1439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0684957280755043, + "step": 1540, + "valid_targets_mean": 15671.4, + "valid_targets_min": 9088 + }, + { + "epoch": 6.547770700636943, + "grad_norm": 0.3186094467001062, + "learning_rate": 5.190653926232169e-07, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09460516273975372, + "step": 1545, + "valid_targets_mean": 13747.5, + "valid_targets_min": 6715 + }, + { + "epoch": 6.56900212314225, + "grad_norm": 0.2830669692378638, + "learning_rate": 4.723020264910139e-07, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10172918438911438, + "step": 1550, + "valid_targets_mean": 19980.6, + "valid_targets_min": 13214 + }, + { + "epoch": 6.590233545647559, + "grad_norm": 0.3388742629588612, + "learning_rate": 4.2772063219523875e-07, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06810663640499115, + "step": 1555, + "valid_targets_mean": 11326.8, + "valid_targets_min": 1387 + }, + { + "epoch": 6.611464968152866, + "grad_norm": 0.25098732861510203, + "learning_rate": 3.853261911395834e-07, + "loss": 0.162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08643859624862671, + "step": 1560, + "valid_targets_mean": 17087.6, + "valid_targets_min": 4205 + }, + { + "epoch": 6.632696390658174, + "grad_norm": 0.25308441903042767, + "learning_rate": 3.4512344036353727e-07, + "loss": 0.1771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0752149447798729, + "step": 1565, + "valid_targets_mean": 14135.8, + "valid_targets_min": 6446 + }, + { + "epoch": 6.653927813163482, + "grad_norm": 0.27008472046636767, + "learning_rate": 3.071168720130779e-07, + "loss": 0.1496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055712342262268066, + "step": 1570, + "valid_targets_mean": 10874.0, + "valid_targets_min": 1381 + }, + { + "epoch": 6.67515923566879, + "grad_norm": 0.25724134253077174, + "learning_rate": 2.7131073283873654e-07, + "loss": 0.1573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059917815029621124, + "step": 1575, + "valid_targets_mean": 14455.5, + "valid_targets_min": 6049 + }, + { + "epoch": 6.6963906581740975, + "grad_norm": 0.27550494093570005, + "learning_rate": 2.3770902372107772e-07, + "loss": 0.1609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0692015290260315, + "step": 1580, + "valid_targets_mean": 13820.6, + "valid_targets_min": 4902 + }, + { + "epoch": 6.717622080679406, + "grad_norm": 0.2523704810311892, + "learning_rate": 2.0631549922364824e-07, + "loss": 0.1427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06639319658279419, + "step": 1585, + "valid_targets_mean": 11344.8, + "valid_targets_min": 5619 + }, + { + "epoch": 6.738853503184713, + "grad_norm": 0.29993361280228925, + "learning_rate": 1.7713366717344803e-07, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07957091927528381, + "step": 1590, + "valid_targets_mean": 9983.5, + "valid_targets_min": 4996 + }, + { + "epoch": 6.7600849256900215, + "grad_norm": 0.2502043937813371, + "learning_rate": 1.5016678826899055e-07, + "loss": 0.1495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07833079993724823, + "step": 1595, + "valid_targets_mean": 20954.8, + "valid_targets_min": 14257 + }, + { + "epoch": 6.781316348195329, + "grad_norm": 0.25034032952355, + "learning_rate": 1.2541787571594522e-07, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07827061414718628, + "step": 1600, + "valid_targets_mean": 15005.9, + "valid_targets_min": 9166 + }, + { + "epoch": 6.802547770700637, + "grad_norm": 0.22039819025048707, + "learning_rate": 1.0288969489046008e-07, + "loss": 0.1417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048642657697200775, + "step": 1605, + "valid_targets_mean": 19840.2, + "valid_targets_min": 11252 + }, + { + "epoch": 6.823779193205945, + "grad_norm": 0.2588836654976448, + "learning_rate": 8.258476303016017e-08, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07373689115047455, + "step": 1610, + "valid_targets_mean": 14245.9, + "valid_targets_min": 8271 + }, + { + "epoch": 6.845010615711253, + "grad_norm": 0.2926967992400304, + "learning_rate": 6.45053489528813e-08, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08257532864809036, + "step": 1615, + "valid_targets_mean": 17754.5, + "valid_targets_min": 10599 + }, + { + "epoch": 6.86624203821656, + "grad_norm": 0.2838687369452468, + "learning_rate": 4.8653472803159576e-08, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0895313173532486, + "step": 1620, + "valid_targets_mean": 16994.5, + "valid_targets_min": 8175 + }, + { + "epoch": 6.887473460721869, + "grad_norm": 0.2750729053185284, + "learning_rate": 3.503090582650081e-08, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07482630014419556, + "step": 1625, + "valid_targets_mean": 16871.8, + "valid_targets_min": 5469 + }, + { + "epoch": 6.908704883227176, + "grad_norm": 0.3270451602832223, + "learning_rate": 2.3639170171474434e-08, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08098286390304565, + "step": 1630, + "valid_targets_mean": 13039.2, + "valid_targets_min": 4344 + }, + { + "epoch": 6.929936305732484, + "grad_norm": 0.3057446527787021, + "learning_rate": 1.4479538719622822e-08, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07232065498828888, + "step": 1635, + "valid_targets_mean": 12158.9, + "valid_targets_min": 5313 + }, + { + "epoch": 6.951167728237792, + "grad_norm": 0.3010327503890946, + "learning_rate": 7.553034943243998e-09, + "loss": 0.174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10355747491121292, + "step": 1640, + "valid_targets_mean": 16122.8, + "valid_targets_min": 7424 + }, + { + "epoch": 6.9723991507431, + "grad_norm": 0.296512581796535, + "learning_rate": 2.8604327910186634e-09, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055927574634552, + "step": 1645, + "valid_targets_mean": 14152.6, + "valid_targets_min": 3364 + }, + { + "epoch": 6.993630573248407, + "grad_norm": 0.29446698895863765, + "learning_rate": 4.02256601546025e-10, + "loss": 0.1742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0790117084980011, + "step": 1650, + "valid_targets_mean": 12653.6, + "valid_targets_min": 5488 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17071497440338135, + "step": 1652, + "total_flos": 1.3627384946606735e+18, + "train_loss": 0.19606392417490914, + "train_runtime": 62933.2387, + "train_samples_per_second": 0.419, + "train_steps_per_second": 0.026, + "valid_targets_mean": 16957.1, + "valid_targets_min": 7571 + } + ], + "logging_steps": 5, + "max_steps": 1652, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.3627384946606735e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}