diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,9683 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4382, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.007987220447284345, + "grad_norm": 16.214843166507823, + "learning_rate": 3.644646924829157e-07, + "loss": 0.6667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6293683052062988, + "step": 5, + "valid_targets_mean": 5840.6, + "valid_targets_min": 1683 + }, + { + "epoch": 0.01597444089456869, + "grad_norm": 16.536060280334816, + "learning_rate": 8.200455580865605e-07, + "loss": 0.7237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7005735635757446, + "step": 10, + "valid_targets_mean": 4628.3, + "valid_targets_min": 851 + }, + { + "epoch": 0.023961661341853034, + "grad_norm": 15.852663272157008, + "learning_rate": 1.2756264236902052e-06, + "loss": 0.711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.665267825126648, + "step": 15, + "valid_targets_mean": 5435.8, + "valid_targets_min": 3190 + }, + { + "epoch": 0.03194888178913738, + "grad_norm": 10.711815863929866, + "learning_rate": 1.7312072892938498e-06, + "loss": 0.6221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5724351406097412, + "step": 20, + "valid_targets_mean": 4856.0, + "valid_targets_min": 924 + }, + { + "epoch": 0.039936102236421724, + "grad_norm": 7.345110084184065, + "learning_rate": 2.1867881548974945e-06, + "loss": 0.5904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49307093024253845, + "step": 25, + "valid_targets_mean": 4609.2, + "valid_targets_min": 566 + }, + { + "epoch": 0.04792332268370607, + "grad_norm": 4.330174769661457, + "learning_rate": 2.642369020501139e-06, + "loss": 0.5453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47903314232826233, + "step": 30, + "valid_targets_mean": 5747.5, + "valid_targets_min": 1355 + }, + { + "epoch": 0.05591054313099041, + "grad_norm": 2.4085938196558376, + "learning_rate": 3.0979498861047843e-06, + "loss": 0.4956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4651956558227539, + "step": 35, + "valid_targets_mean": 5118.4, + "valid_targets_min": 800 + }, + { + "epoch": 0.06389776357827476, + "grad_norm": 1.516310546910285, + "learning_rate": 3.5535307517084285e-06, + "loss": 0.4872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4683207869529724, + "step": 40, + "valid_targets_mean": 4522.1, + "valid_targets_min": 838 + }, + { + "epoch": 0.07188498402555911, + "grad_norm": 1.0901568008604194, + "learning_rate": 4.009111617312073e-06, + "loss": 0.434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4305180311203003, + "step": 45, + "valid_targets_mean": 5100.6, + "valid_targets_min": 829 + }, + { + "epoch": 0.07987220447284345, + "grad_norm": 1.0694363953729904, + "learning_rate": 4.464692482915718e-06, + "loss": 0.4373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4268118143081665, + "step": 50, + "valid_targets_mean": 3879.9, + "valid_targets_min": 590 + }, + { + "epoch": 0.0878594249201278, + "grad_norm": 0.9467308768467405, + "learning_rate": 4.920273348519363e-06, + "loss": 0.4271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.475871741771698, + "step": 55, + "valid_targets_mean": 3926.7, + "valid_targets_min": 2067 + }, + { + "epoch": 0.09584664536741214, + "grad_norm": 0.7979303257083482, + "learning_rate": 5.375854214123008e-06, + "loss": 0.4191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4331532418727875, + "step": 60, + "valid_targets_mean": 5110.4, + "valid_targets_min": 886 + }, + { + "epoch": 0.10383386581469649, + "grad_norm": 0.5833315958168446, + "learning_rate": 5.831435079726651e-06, + "loss": 0.3782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33264434337615967, + "step": 65, + "valid_targets_mean": 5157.1, + "valid_targets_min": 920 + }, + { + "epoch": 0.11182108626198083, + "grad_norm": 0.534341175313362, + "learning_rate": 6.287015945330297e-06, + "loss": 0.3964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36213618516921997, + "step": 70, + "valid_targets_mean": 5796.7, + "valid_targets_min": 1392 + }, + { + "epoch": 0.11980830670926518, + "grad_norm": 0.6474280808218216, + "learning_rate": 6.742596810933942e-06, + "loss": 0.4077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4014986455440521, + "step": 75, + "valid_targets_mean": 4715.4, + "valid_targets_min": 1523 + }, + { + "epoch": 0.12779552715654952, + "grad_norm": 0.6245790708527251, + "learning_rate": 7.1981776765375854e-06, + "loss": 0.371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36042672395706177, + "step": 80, + "valid_targets_mean": 4445.1, + "valid_targets_min": 1210 + }, + { + "epoch": 0.13578274760383385, + "grad_norm": 0.5352596369045343, + "learning_rate": 7.65375854214123e-06, + "loss": 0.3556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36412060260772705, + "step": 85, + "valid_targets_mean": 5408.6, + "valid_targets_min": 1124 + }, + { + "epoch": 0.14376996805111822, + "grad_norm": 0.522019132641173, + "learning_rate": 8.109339407744875e-06, + "loss": 0.3712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3510782718658447, + "step": 90, + "valid_targets_mean": 5306.1, + "valid_targets_min": 1987 + }, + { + "epoch": 0.15175718849840256, + "grad_norm": 0.5834541923968949, + "learning_rate": 8.564920273348521e-06, + "loss": 0.3543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37941974401474, + "step": 95, + "valid_targets_mean": 4713.2, + "valid_targets_min": 857 + }, + { + "epoch": 0.1597444089456869, + "grad_norm": 0.5977305337467345, + "learning_rate": 9.020501138952164e-06, + "loss": 0.3535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34058240056037903, + "step": 100, + "valid_targets_mean": 4175.4, + "valid_targets_min": 901 + }, + { + "epoch": 0.16773162939297126, + "grad_norm": 0.5777396640406296, + "learning_rate": 9.47608200455581e-06, + "loss": 0.3411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37314867973327637, + "step": 105, + "valid_targets_mean": 4329.9, + "valid_targets_min": 320 + }, + { + "epoch": 0.1757188498402556, + "grad_norm": 0.5383539023704225, + "learning_rate": 9.931662870159453e-06, + "loss": 0.357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3683736324310303, + "step": 110, + "valid_targets_mean": 5286.6, + "valid_targets_min": 1453 + }, + { + "epoch": 0.18370607028753994, + "grad_norm": 0.7872582146315561, + "learning_rate": 1.03872437357631e-05, + "loss": 0.326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30175650119781494, + "step": 115, + "valid_targets_mean": 4889.8, + "valid_targets_min": 1692 + }, + { + "epoch": 0.19169329073482427, + "grad_norm": 0.598028210832286, + "learning_rate": 1.0842824601366744e-05, + "loss": 0.3108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29703736305236816, + "step": 120, + "valid_targets_mean": 4932.6, + "valid_targets_min": 657 + }, + { + "epoch": 0.19968051118210864, + "grad_norm": 0.5924147320645817, + "learning_rate": 1.1298405466970387e-05, + "loss": 0.338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30937889218330383, + "step": 125, + "valid_targets_mean": 5179.6, + "valid_targets_min": 1355 + }, + { + "epoch": 0.20766773162939298, + "grad_norm": 0.5681641346639285, + "learning_rate": 1.1753986332574032e-05, + "loss": 0.3357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34611114859580994, + "step": 130, + "valid_targets_mean": 5334.6, + "valid_targets_min": 1313 + }, + { + "epoch": 0.21565495207667731, + "grad_norm": 0.5082080515535616, + "learning_rate": 1.2209567198177677e-05, + "loss": 0.3366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3541598618030548, + "step": 135, + "valid_targets_mean": 5861.4, + "valid_targets_min": 2080 + }, + { + "epoch": 0.22364217252396165, + "grad_norm": 0.5598608784555669, + "learning_rate": 1.2665148063781323e-05, + "loss": 0.2984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31298428773880005, + "step": 140, + "valid_targets_mean": 4757.7, + "valid_targets_min": 2006 + }, + { + "epoch": 0.23162939297124602, + "grad_norm": 0.5517176609096509, + "learning_rate": 1.3120728929384968e-05, + "loss": 0.3104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28146520256996155, + "step": 145, + "valid_targets_mean": 4815.2, + "valid_targets_min": 714 + }, + { + "epoch": 0.23961661341853036, + "grad_norm": 0.6508519385951486, + "learning_rate": 1.357630979498861e-05, + "loss": 0.3298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34707149863243103, + "step": 150, + "valid_targets_mean": 3598.0, + "valid_targets_min": 735 + }, + { + "epoch": 0.2476038338658147, + "grad_norm": 0.5496388523379975, + "learning_rate": 1.4031890660592255e-05, + "loss": 0.305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29744917154312134, + "step": 155, + "valid_targets_mean": 4697.5, + "valid_targets_min": 933 + }, + { + "epoch": 0.25559105431309903, + "grad_norm": 0.4789728949464768, + "learning_rate": 1.4487471526195902e-05, + "loss": 0.3128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30035269260406494, + "step": 160, + "valid_targets_mean": 5251.2, + "valid_targets_min": 1848 + }, + { + "epoch": 0.26357827476038337, + "grad_norm": 0.6371209181942088, + "learning_rate": 1.4943052391799546e-05, + "loss": 0.3018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3188396692276001, + "step": 165, + "valid_targets_mean": 4774.2, + "valid_targets_min": 363 + }, + { + "epoch": 0.2715654952076677, + "grad_norm": 0.5513751212341264, + "learning_rate": 1.539863325740319e-05, + "loss": 0.2944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27828067541122437, + "step": 170, + "valid_targets_mean": 4899.8, + "valid_targets_min": 1382 + }, + { + "epoch": 0.2795527156549521, + "grad_norm": 0.47809556821894617, + "learning_rate": 1.5854214123006836e-05, + "loss": 0.3152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33486631512641907, + "step": 175, + "valid_targets_mean": 5406.3, + "valid_targets_min": 546 + }, + { + "epoch": 0.28753993610223644, + "grad_norm": 0.5552245835273288, + "learning_rate": 1.630979498861048e-05, + "loss": 0.29, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2863185405731201, + "step": 180, + "valid_targets_mean": 3716.8, + "valid_targets_min": 813 + }, + { + "epoch": 0.2955271565495208, + "grad_norm": 0.7370634203226984, + "learning_rate": 1.6765375854214125e-05, + "loss": 0.2889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30801016092300415, + "step": 185, + "valid_targets_mean": 4590.7, + "valid_targets_min": 596 + }, + { + "epoch": 0.3035143769968051, + "grad_norm": 0.5009946980417955, + "learning_rate": 1.722095671981777e-05, + "loss": 0.3077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2993805706501007, + "step": 190, + "valid_targets_mean": 5422.0, + "valid_targets_min": 288 + }, + { + "epoch": 0.31150159744408945, + "grad_norm": 0.5358598801386117, + "learning_rate": 1.7676537585421415e-05, + "loss": 0.2763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29835355281829834, + "step": 195, + "valid_targets_mean": 5053.3, + "valid_targets_min": 924 + }, + { + "epoch": 0.3194888178913738, + "grad_norm": 1.1064965236867108, + "learning_rate": 1.813211845102506e-05, + "loss": 0.2933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28204622864723206, + "step": 200, + "valid_targets_mean": 4445.6, + "valid_targets_min": 329 + }, + { + "epoch": 0.3274760383386581, + "grad_norm": 0.6238934475229897, + "learning_rate": 1.8587699316628704e-05, + "loss": 0.3052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28925684094429016, + "step": 205, + "valid_targets_mean": 4103.9, + "valid_targets_min": 737 + }, + { + "epoch": 0.3354632587859425, + "grad_norm": 0.5595733482793672, + "learning_rate": 1.904328018223235e-05, + "loss": 0.2935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29793277382850647, + "step": 210, + "valid_targets_mean": 4934.9, + "valid_targets_min": 1633 + }, + { + "epoch": 0.34345047923322686, + "grad_norm": 0.5589513052143709, + "learning_rate": 1.9498861047835993e-05, + "loss": 0.3022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3029059171676636, + "step": 215, + "valid_targets_mean": 4674.2, + "valid_targets_min": 2145 + }, + { + "epoch": 0.3514376996805112, + "grad_norm": 0.5602768175734714, + "learning_rate": 1.9954441913439638e-05, + "loss": 0.2844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2719416916370392, + "step": 220, + "valid_targets_mean": 5259.6, + "valid_targets_min": 2728 + }, + { + "epoch": 0.35942492012779553, + "grad_norm": 0.5840002344805434, + "learning_rate": 2.0410022779043283e-05, + "loss": 0.2622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.252376914024353, + "step": 225, + "valid_targets_mean": 3708.0, + "valid_targets_min": 1563 + }, + { + "epoch": 0.36741214057507987, + "grad_norm": 0.4988357514360904, + "learning_rate": 2.0865603644646927e-05, + "loss": 0.2785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2672957479953766, + "step": 230, + "valid_targets_mean": 5539.1, + "valid_targets_min": 1948 + }, + { + "epoch": 0.3753993610223642, + "grad_norm": 0.5265055363647089, + "learning_rate": 2.1321184510250572e-05, + "loss": 0.2832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2662750482559204, + "step": 235, + "valid_targets_mean": 5089.4, + "valid_targets_min": 2441 + }, + { + "epoch": 0.38338658146964855, + "grad_norm": 0.6319243139561913, + "learning_rate": 2.1776765375854217e-05, + "loss": 0.2694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2903909683227539, + "step": 240, + "valid_targets_mean": 4345.3, + "valid_targets_min": 1426 + }, + { + "epoch": 0.3913738019169329, + "grad_norm": 0.5332922577584055, + "learning_rate": 2.223234624145786e-05, + "loss": 0.2692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2847251892089844, + "step": 245, + "valid_targets_mean": 5051.9, + "valid_targets_min": 601 + }, + { + "epoch": 0.3993610223642173, + "grad_norm": 0.530308481283379, + "learning_rate": 2.2687927107061506e-05, + "loss": 0.2896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2778535485267639, + "step": 250, + "valid_targets_mean": 4761.2, + "valid_targets_min": 1097 + }, + { + "epoch": 0.4073482428115016, + "grad_norm": 0.6098521445637076, + "learning_rate": 2.314350797266515e-05, + "loss": 0.297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27581244707107544, + "step": 255, + "valid_targets_mean": 3908.1, + "valid_targets_min": 1588 + }, + { + "epoch": 0.41533546325878595, + "grad_norm": 0.5921423858911565, + "learning_rate": 2.3599088838268792e-05, + "loss": 0.2734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2547699809074402, + "step": 260, + "valid_targets_mean": 5054.8, + "valid_targets_min": 358 + }, + { + "epoch": 0.4233226837060703, + "grad_norm": 0.5289923217272506, + "learning_rate": 2.4054669703872436e-05, + "loss": 0.2595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2866251468658447, + "step": 265, + "valid_targets_mean": 5726.3, + "valid_targets_min": 2125 + }, + { + "epoch": 0.43130990415335463, + "grad_norm": 0.5537781627686953, + "learning_rate": 2.4510250569476085e-05, + "loss": 0.3091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32708555459976196, + "step": 270, + "valid_targets_mean": 5520.9, + "valid_targets_min": 3173 + }, + { + "epoch": 0.43929712460063897, + "grad_norm": 0.5104465395111398, + "learning_rate": 2.496583143507973e-05, + "loss": 0.2773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3148839771747589, + "step": 275, + "valid_targets_mean": 5590.6, + "valid_targets_min": 2062 + }, + { + "epoch": 0.4472843450479233, + "grad_norm": 0.5551638885961803, + "learning_rate": 2.5421412300683374e-05, + "loss": 0.2862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28861215710639954, + "step": 280, + "valid_targets_mean": 4605.2, + "valid_targets_min": 2405 + }, + { + "epoch": 0.45527156549520764, + "grad_norm": 0.5774373008716863, + "learning_rate": 2.587699316628702e-05, + "loss": 0.2948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2806679904460907, + "step": 285, + "valid_targets_mean": 4729.7, + "valid_targets_min": 1175 + }, + { + "epoch": 0.46325878594249204, + "grad_norm": 0.5428102603769059, + "learning_rate": 2.6332574031890663e-05, + "loss": 0.2946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32112547755241394, + "step": 290, + "valid_targets_mean": 5787.0, + "valid_targets_min": 1645 + }, + { + "epoch": 0.4712460063897764, + "grad_norm": 0.521358822134473, + "learning_rate": 2.6788154897494308e-05, + "loss": 0.2796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28433695435523987, + "step": 295, + "valid_targets_mean": 5907.2, + "valid_targets_min": 1100 + }, + { + "epoch": 0.4792332268370607, + "grad_norm": 0.5337346140514683, + "learning_rate": 2.7243735763097953e-05, + "loss": 0.2778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2725232243537903, + "step": 300, + "valid_targets_mean": 5427.4, + "valid_targets_min": 2111 + }, + { + "epoch": 0.48722044728434505, + "grad_norm": 0.5470089886286064, + "learning_rate": 2.7699316628701597e-05, + "loss": 0.2721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26878929138183594, + "step": 305, + "valid_targets_mean": 4654.0, + "valid_targets_min": 728 + }, + { + "epoch": 0.4952076677316294, + "grad_norm": 0.5190354642636552, + "learning_rate": 2.815489749430524e-05, + "loss": 0.2708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2548917233943939, + "step": 310, + "valid_targets_mean": 5067.2, + "valid_targets_min": 1822 + }, + { + "epoch": 0.5031948881789138, + "grad_norm": 0.5687012066316108, + "learning_rate": 2.8610478359908883e-05, + "loss": 0.2798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2866740822792053, + "step": 315, + "valid_targets_mean": 5067.9, + "valid_targets_min": 1552 + }, + { + "epoch": 0.5111821086261981, + "grad_norm": 0.5373160925653192, + "learning_rate": 2.906605922551253e-05, + "loss": 0.2718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27760303020477295, + "step": 320, + "valid_targets_mean": 5024.9, + "valid_targets_min": 1192 + }, + { + "epoch": 0.5191693290734825, + "grad_norm": 0.5362070010772059, + "learning_rate": 2.9521640091116176e-05, + "loss": 0.2727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28479108214378357, + "step": 325, + "valid_targets_mean": 5303.2, + "valid_targets_min": 2418 + }, + { + "epoch": 0.5271565495207667, + "grad_norm": 0.5848089779265616, + "learning_rate": 2.997722095671982e-05, + "loss": 0.2645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26311156153678894, + "step": 330, + "valid_targets_mean": 4052.6, + "valid_targets_min": 337 + }, + { + "epoch": 0.5351437699680511, + "grad_norm": 0.5493496138236031, + "learning_rate": 3.0432801822323465e-05, + "loss": 0.267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2734183967113495, + "step": 335, + "valid_targets_mean": 5034.6, + "valid_targets_min": 2061 + }, + { + "epoch": 0.5431309904153354, + "grad_norm": 0.5635667614887097, + "learning_rate": 3.088838268792711e-05, + "loss": 0.2864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3333064615726471, + "step": 340, + "valid_targets_mean": 5823.9, + "valid_targets_min": 333 + }, + { + "epoch": 0.5511182108626198, + "grad_norm": 0.5019872954405129, + "learning_rate": 3.1343963553530755e-05, + "loss": 0.2706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29065948724746704, + "step": 345, + "valid_targets_mean": 5628.3, + "valid_targets_min": 2412 + }, + { + "epoch": 0.5591054313099042, + "grad_norm": 0.5374160693839819, + "learning_rate": 3.17995444191344e-05, + "loss": 0.2604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25003743171691895, + "step": 350, + "valid_targets_mean": 4885.5, + "valid_targets_min": 1883 + }, + { + "epoch": 0.5670926517571885, + "grad_norm": 0.5230300161449793, + "learning_rate": 3.2255125284738044e-05, + "loss": 0.2593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3082168996334076, + "step": 355, + "valid_targets_mean": 5003.2, + "valid_targets_min": 919 + }, + { + "epoch": 0.5750798722044729, + "grad_norm": 0.5801770052473694, + "learning_rate": 3.271070615034169e-05, + "loss": 0.2514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29632386565208435, + "step": 360, + "valid_targets_mean": 4708.2, + "valid_targets_min": 948 + }, + { + "epoch": 0.5830670926517572, + "grad_norm": 0.5472241673959384, + "learning_rate": 3.316628701594533e-05, + "loss": 0.286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25985270738601685, + "step": 365, + "valid_targets_mean": 4563.3, + "valid_targets_min": 997 + }, + { + "epoch": 0.5910543130990416, + "grad_norm": 0.5149939473439978, + "learning_rate": 3.362186788154898e-05, + "loss": 0.2661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27894413471221924, + "step": 370, + "valid_targets_mean": 5198.9, + "valid_targets_min": 720 + }, + { + "epoch": 0.5990415335463258, + "grad_norm": 0.580767045784304, + "learning_rate": 3.407744874715262e-05, + "loss": 0.2671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2662809491157532, + "step": 375, + "valid_targets_mean": 4736.9, + "valid_targets_min": 521 + }, + { + "epoch": 0.6070287539936102, + "grad_norm": 0.5837236073120515, + "learning_rate": 3.453302961275627e-05, + "loss": 0.2658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29299938678741455, + "step": 380, + "valid_targets_mean": 4359.5, + "valid_targets_min": 626 + }, + { + "epoch": 0.6150159744408946, + "grad_norm": 0.5683559146502843, + "learning_rate": 3.498861047835991e-05, + "loss": 0.2605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2716815173625946, + "step": 385, + "valid_targets_mean": 4500.7, + "valid_targets_min": 1009 + }, + { + "epoch": 0.6230031948881789, + "grad_norm": 0.5521731757690844, + "learning_rate": 3.5444191343963557e-05, + "loss": 0.2642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2861851453781128, + "step": 390, + "valid_targets_mean": 4939.9, + "valid_targets_min": 1717 + }, + { + "epoch": 0.6309904153354633, + "grad_norm": 0.5454459023619236, + "learning_rate": 3.58997722095672e-05, + "loss": 0.271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2945636212825775, + "step": 395, + "valid_targets_mean": 5303.6, + "valid_targets_min": 1049 + }, + { + "epoch": 0.6389776357827476, + "grad_norm": 0.537113572597406, + "learning_rate": 3.6355353075170846e-05, + "loss": 0.2666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2924715578556061, + "step": 400, + "valid_targets_mean": 5877.7, + "valid_targets_min": 1820 + }, + { + "epoch": 0.646964856230032, + "grad_norm": 0.5741616450872754, + "learning_rate": 3.681093394077449e-05, + "loss": 0.2591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3024194836616516, + "step": 405, + "valid_targets_mean": 4992.8, + "valid_targets_min": 807 + }, + { + "epoch": 0.6549520766773163, + "grad_norm": 0.5581797703618503, + "learning_rate": 3.7266514806378135e-05, + "loss": 0.2613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23057574033737183, + "step": 410, + "valid_targets_mean": 4829.6, + "valid_targets_min": 995 + }, + { + "epoch": 0.6629392971246006, + "grad_norm": 0.5027904434256412, + "learning_rate": 3.772209567198178e-05, + "loss": 0.2555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22512824833393097, + "step": 415, + "valid_targets_mean": 5073.9, + "valid_targets_min": 1198 + }, + { + "epoch": 0.670926517571885, + "grad_norm": 0.4590703964041238, + "learning_rate": 3.8177676537585425e-05, + "loss": 0.2651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28053027391433716, + "step": 420, + "valid_targets_mean": 5108.2, + "valid_targets_min": 974 + }, + { + "epoch": 0.6789137380191693, + "grad_norm": 0.5634136170884916, + "learning_rate": 3.863325740318907e-05, + "loss": 0.2611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2774794101715088, + "step": 425, + "valid_targets_mean": 5321.6, + "valid_targets_min": 1760 + }, + { + "epoch": 0.6869009584664537, + "grad_norm": 0.49462936802536345, + "learning_rate": 3.9088838268792714e-05, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23789596557617188, + "step": 430, + "valid_targets_mean": 4688.5, + "valid_targets_min": 1566 + }, + { + "epoch": 0.694888178913738, + "grad_norm": 0.5767236738768104, + "learning_rate": 3.954441913439636e-05, + "loss": 0.2549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23438569903373718, + "step": 435, + "valid_targets_mean": 5462.2, + "valid_targets_min": 988 + }, + { + "epoch": 0.7028753993610224, + "grad_norm": 0.5721728971305822, + "learning_rate": 4e-05, + "loss": 0.2583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2589830160140991, + "step": 440, + "valid_targets_mean": 4021.7, + "valid_targets_min": 680 + }, + { + "epoch": 0.7108626198083067, + "grad_norm": 0.640470205466098, + "learning_rate": 3.999984129682125e-05, + "loss": 0.2541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.263258695602417, + "step": 445, + "valid_targets_mean": 4029.4, + "valid_targets_min": 1150 + }, + { + "epoch": 0.7188498402555911, + "grad_norm": 0.56011564240242, + "learning_rate": 3.9999365189803684e-05, + "loss": 0.2561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23699337244033813, + "step": 450, + "valid_targets_mean": 4542.3, + "valid_targets_min": 614 + }, + { + "epoch": 0.7268370607028753, + "grad_norm": 0.5005609148594315, + "learning_rate": 3.9998571686503264e-05, + "loss": 0.2502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2354847490787506, + "step": 455, + "valid_targets_mean": 5020.0, + "valid_targets_min": 395 + }, + { + "epoch": 0.7348242811501597, + "grad_norm": 0.46714059891659654, + "learning_rate": 3.9997460799513134e-05, + "loss": 0.2479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2476460188627243, + "step": 460, + "valid_targets_mean": 6124.8, + "valid_targets_min": 1554 + }, + { + "epoch": 0.7428115015974441, + "grad_norm": 0.4917282383555329, + "learning_rate": 3.999603254646343e-05, + "loss": 0.2575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23978930711746216, + "step": 465, + "valid_targets_mean": 3983.5, + "valid_targets_min": 1464 + }, + { + "epoch": 0.7507987220447284, + "grad_norm": 0.5176375158379338, + "learning_rate": 3.9994286950020986e-05, + "loss": 0.272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29831236600875854, + "step": 470, + "valid_targets_mean": 5806.6, + "valid_targets_min": 2786 + }, + { + "epoch": 0.7587859424920128, + "grad_norm": 0.5702966288748866, + "learning_rate": 3.999222403788896e-05, + "loss": 0.2716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2796366214752197, + "step": 475, + "valid_targets_mean": 3448.6, + "valid_targets_min": 817 + }, + { + "epoch": 0.7667731629392971, + "grad_norm": 0.4757010714725201, + "learning_rate": 3.9989843842806435e-05, + "loss": 0.2547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28887617588043213, + "step": 480, + "valid_targets_mean": 5631.5, + "valid_targets_min": 968 + }, + { + "epoch": 0.7747603833865815, + "grad_norm": 0.4685350745552442, + "learning_rate": 3.998714640254786e-05, + "loss": 0.266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2430667281150818, + "step": 485, + "valid_targets_mean": 5380.9, + "valid_targets_min": 2037 + }, + { + "epoch": 0.7827476038338658, + "grad_norm": 0.48364161750691215, + "learning_rate": 3.998413175992247e-05, + "loss": 0.2625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2936822175979614, + "step": 490, + "valid_targets_mean": 5195.9, + "valid_targets_min": 1019 + }, + { + "epoch": 0.7907348242811502, + "grad_norm": 0.5358890980558945, + "learning_rate": 3.99807999627736e-05, + "loss": 0.2714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2451843023300171, + "step": 495, + "valid_targets_mean": 4305.3, + "valid_targets_min": 1344 + }, + { + "epoch": 0.7987220447284346, + "grad_norm": 0.49266507882701177, + "learning_rate": 3.997715106397794e-05, + "loss": 0.2602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21446114778518677, + "step": 500, + "valid_targets_mean": 4101.4, + "valid_targets_min": 553 + }, + { + "epoch": 0.8067092651757188, + "grad_norm": 0.5283191167485831, + "learning_rate": 3.997318512144465e-05, + "loss": 0.261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27231770753860474, + "step": 505, + "valid_targets_mean": 4392.6, + "valid_targets_min": 617 + }, + { + "epoch": 0.8146964856230032, + "grad_norm": 0.4637854327346454, + "learning_rate": 3.9968902198114516e-05, + "loss": 0.2513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23250697553157806, + "step": 510, + "valid_targets_mean": 5592.4, + "valid_targets_min": 1339 + }, + { + "epoch": 0.8226837060702875, + "grad_norm": 0.512928763182033, + "learning_rate": 3.996430236195889e-05, + "loss": 0.2575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2848711311817169, + "step": 515, + "valid_targets_mean": 5160.4, + "valid_targets_min": 1245 + }, + { + "epoch": 0.8306709265175719, + "grad_norm": 0.6688205624295005, + "learning_rate": 3.995938568597864e-05, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28169065713882446, + "step": 520, + "valid_targets_mean": 4845.2, + "valid_targets_min": 1200 + }, + { + "epoch": 0.8386581469648562, + "grad_norm": 0.6530982098422367, + "learning_rate": 3.995415224820297e-05, + "loss": 0.252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2486608326435089, + "step": 525, + "valid_targets_mean": 4801.1, + "valid_targets_min": 1232 + }, + { + "epoch": 0.8466453674121406, + "grad_norm": 0.654809754081559, + "learning_rate": 3.994860213168819e-05, + "loss": 0.2574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23041307926177979, + "step": 530, + "valid_targets_mean": 4810.8, + "valid_targets_min": 1533 + }, + { + "epoch": 0.854632587859425, + "grad_norm": 0.7831567102686312, + "learning_rate": 3.9942735424516435e-05, + "loss": 0.2829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21508295834064484, + "step": 535, + "valid_targets_mean": 3856.6, + "valid_targets_min": 697 + }, + { + "epoch": 0.8626198083067093, + "grad_norm": 0.4712439919508373, + "learning_rate": 3.9936552219794196e-05, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2780511677265167, + "step": 540, + "valid_targets_mean": 5232.4, + "valid_targets_min": 1528 + }, + { + "epoch": 0.8706070287539937, + "grad_norm": 0.4492945585686436, + "learning_rate": 3.993005261565091e-05, + "loss": 0.2701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25370901823043823, + "step": 545, + "valid_targets_mean": 5525.1, + "valid_targets_min": 785 + }, + { + "epoch": 0.8785942492012779, + "grad_norm": 0.5040607967651063, + "learning_rate": 3.992323671523735e-05, + "loss": 0.2537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24419763684272766, + "step": 550, + "valid_targets_mean": 4753.2, + "valid_targets_min": 2476 + }, + { + "epoch": 0.8865814696485623, + "grad_norm": 0.4571433610607522, + "learning_rate": 3.991610462672403e-05, + "loss": 0.2576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22853198647499084, + "step": 555, + "valid_targets_mean": 5481.8, + "valid_targets_min": 1745 + }, + { + "epoch": 0.8945686900958466, + "grad_norm": 0.4811065332797884, + "learning_rate": 3.9908656463299456e-05, + "loss": 0.2573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2230932116508484, + "step": 560, + "valid_targets_mean": 4151.3, + "valid_targets_min": 2275 + }, + { + "epoch": 0.902555910543131, + "grad_norm": 0.4478004354255293, + "learning_rate": 3.990089234316835e-05, + "loss": 0.2281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2097976952791214, + "step": 565, + "valid_targets_mean": 4595.0, + "valid_targets_min": 846 + }, + { + "epoch": 0.9105431309904153, + "grad_norm": 0.4939456900920226, + "learning_rate": 3.989281238954978e-05, + "loss": 0.2513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23206022381782532, + "step": 570, + "valid_targets_mean": 4879.6, + "valid_targets_min": 441 + }, + { + "epoch": 0.9185303514376997, + "grad_norm": 0.5749964628221267, + "learning_rate": 3.9884416730675155e-05, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23972266912460327, + "step": 575, + "valid_targets_mean": 4225.3, + "valid_targets_min": 1024 + }, + { + "epoch": 0.9265175718849841, + "grad_norm": 0.5589879109135186, + "learning_rate": 3.987570549978626e-05, + "loss": 0.2361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2693712115287781, + "step": 580, + "valid_targets_mean": 4750.5, + "valid_targets_min": 1402 + }, + { + "epoch": 0.9345047923322684, + "grad_norm": 0.495003839937131, + "learning_rate": 3.986667883513311e-05, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23675037920475006, + "step": 585, + "valid_targets_mean": 4722.5, + "valid_targets_min": 1095 + }, + { + "epoch": 0.9424920127795527, + "grad_norm": 0.4958347061039251, + "learning_rate": 3.985733687997173e-05, + "loss": 0.2548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27246877551078796, + "step": 590, + "valid_targets_mean": 4859.9, + "valid_targets_min": 1067 + }, + { + "epoch": 0.950479233226837, + "grad_norm": 0.5214337152337335, + "learning_rate": 3.984767978256192e-05, + "loss": 0.2522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2561061680316925, + "step": 595, + "valid_targets_mean": 4810.3, + "valid_targets_min": 962 + }, + { + "epoch": 0.9584664536741214, + "grad_norm": 0.599666677230963, + "learning_rate": 3.983770769616488e-05, + "loss": 0.2532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24689209461212158, + "step": 600, + "valid_targets_mean": 4667.0, + "valid_targets_min": 611 + }, + { + "epoch": 0.9664536741214057, + "grad_norm": 0.42495129537443144, + "learning_rate": 3.9827420779040805e-05, + "loss": 0.2632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22384385764598846, + "step": 605, + "valid_targets_mean": 5402.8, + "valid_targets_min": 1674 + }, + { + "epoch": 0.9744408945686901, + "grad_norm": 0.5919379724029944, + "learning_rate": 3.981681919444633e-05, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21028448641300201, + "step": 610, + "valid_targets_mean": 4113.6, + "valid_targets_min": 1029 + }, + { + "epoch": 0.9824281150159745, + "grad_norm": 0.5057594048145762, + "learning_rate": 3.980590311063197e-05, + "loss": 0.2482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.263153612613678, + "step": 615, + "valid_targets_mean": 4787.4, + "valid_targets_min": 332 + }, + { + "epoch": 0.9904153354632588, + "grad_norm": 0.5473428636517159, + "learning_rate": 3.9794672700839455e-05, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25911781191825867, + "step": 620, + "valid_targets_mean": 5511.7, + "valid_targets_min": 1610 + }, + { + "epoch": 0.9984025559105432, + "grad_norm": 0.6548522189800827, + "learning_rate": 3.9783128143298945e-05, + "loss": 0.2445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23794244229793549, + "step": 625, + "valid_targets_mean": 3679.9, + "valid_targets_min": 1378 + }, + { + "epoch": 1.0063897763578276, + "grad_norm": 0.4637817317582565, + "learning_rate": 3.977126962122625e-05, + "loss": 0.2547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24145828187465668, + "step": 630, + "valid_targets_mean": 4904.1, + "valid_targets_min": 1583 + }, + { + "epoch": 1.0143769968051117, + "grad_norm": 0.5360031330976764, + "learning_rate": 3.975909732281988e-05, + "loss": 0.2381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2447453737258911, + "step": 635, + "valid_targets_mean": 4470.6, + "valid_targets_min": 1193 + }, + { + "epoch": 1.0223642172523961, + "grad_norm": 0.4599528851816257, + "learning_rate": 3.974661144125808e-05, + "loss": 0.2356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23513412475585938, + "step": 640, + "valid_targets_mean": 5180.8, + "valid_targets_min": 1535 + }, + { + "epoch": 1.0303514376996805, + "grad_norm": 0.5028441117758059, + "learning_rate": 3.973381217469576e-05, + "loss": 0.2334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23187929391860962, + "step": 645, + "valid_targets_mean": 5562.6, + "valid_targets_min": 835 + }, + { + "epoch": 1.038338658146965, + "grad_norm": 0.46649378993081314, + "learning_rate": 3.972069972626135e-05, + "loss": 0.2307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2249964475631714, + "step": 650, + "valid_targets_mean": 5799.1, + "valid_targets_min": 1617 + }, + { + "epoch": 1.0463258785942493, + "grad_norm": 0.4890492009118198, + "learning_rate": 3.970727430405357e-05, + "loss": 0.2245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2165513038635254, + "step": 655, + "valid_targets_mean": 4514.2, + "valid_targets_min": 777 + }, + { + "epoch": 1.0543130990415335, + "grad_norm": 0.5326777464844199, + "learning_rate": 3.969353612113815e-05, + "loss": 0.2363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23171201348304749, + "step": 660, + "valid_targets_mean": 4665.6, + "valid_targets_min": 835 + }, + { + "epoch": 1.0623003194888179, + "grad_norm": 0.4418803882160922, + "learning_rate": 3.96794853955444e-05, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19331520795822144, + "step": 665, + "valid_targets_mean": 5251.9, + "valid_targets_min": 1005 + }, + { + "epoch": 1.0702875399361023, + "grad_norm": 0.45451533824288753, + "learning_rate": 3.966512235026182e-05, + "loss": 0.2309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2011176496744156, + "step": 670, + "valid_targets_mean": 5191.7, + "valid_targets_min": 723 + }, + { + "epoch": 1.0782747603833867, + "grad_norm": 0.5484392391440085, + "learning_rate": 3.96504472132365e-05, + "loss": 0.2251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2403126060962677, + "step": 675, + "valid_targets_mean": 4600.9, + "valid_targets_min": 940 + }, + { + "epoch": 1.0862619808306708, + "grad_norm": 0.4836244631836265, + "learning_rate": 3.9635460217367513e-05, + "loss": 0.2347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2545502781867981, + "step": 680, + "valid_targets_mean": 5117.4, + "valid_targets_min": 1430 + }, + { + "epoch": 1.0942492012779552, + "grad_norm": 0.5347142043189543, + "learning_rate": 3.962016160050327e-05, + "loss": 0.2411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24589413404464722, + "step": 685, + "valid_targets_mean": 4828.7, + "valid_targets_min": 1024 + }, + { + "epoch": 1.1022364217252396, + "grad_norm": 0.4826007302397816, + "learning_rate": 3.960455160543767e-05, + "loss": 0.2264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2529512345790863, + "step": 690, + "valid_targets_mean": 5452.4, + "valid_targets_min": 406 + }, + { + "epoch": 1.110223642172524, + "grad_norm": 0.6187571170123001, + "learning_rate": 3.958863047990631e-05, + "loss": 0.2408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27497977018356323, + "step": 695, + "valid_targets_mean": 4824.9, + "valid_targets_min": 590 + }, + { + "epoch": 1.1182108626198084, + "grad_norm": 0.5218472166818443, + "learning_rate": 3.95723984765825e-05, + "loss": 0.2384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2636116147041321, + "step": 700, + "valid_targets_mean": 5057.1, + "valid_targets_min": 625 + }, + { + "epoch": 1.1261980830670926, + "grad_norm": 0.5566646689747374, + "learning_rate": 3.955585585307329e-05, + "loss": 0.2246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23090273141860962, + "step": 705, + "valid_targets_mean": 5316.9, + "valid_targets_min": 1154 + }, + { + "epoch": 1.134185303514377, + "grad_norm": 0.5316587953258529, + "learning_rate": 3.9539002871915395e-05, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2314445823431015, + "step": 710, + "valid_targets_mean": 4499.5, + "valid_targets_min": 850 + }, + { + "epoch": 1.1421725239616614, + "grad_norm": 0.47010947677754694, + "learning_rate": 3.952183980057096e-05, + "loss": 0.2133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18813036382198334, + "step": 715, + "valid_targets_mean": 4771.4, + "valid_targets_min": 2418 + }, + { + "epoch": 1.1501597444089458, + "grad_norm": 0.5355744283119177, + "learning_rate": 3.950436691142339e-05, + "loss": 0.2316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23436841368675232, + "step": 720, + "valid_targets_mean": 5125.1, + "valid_targets_min": 1756 + }, + { + "epoch": 1.15814696485623, + "grad_norm": 0.45835728873246123, + "learning_rate": 3.948658448177299e-05, + "loss": 0.2291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24731190502643585, + "step": 725, + "valid_targets_mean": 5959.6, + "valid_targets_min": 566 + }, + { + "epoch": 1.1661341853035143, + "grad_norm": 0.453683250662003, + "learning_rate": 3.946849279383258e-05, + "loss": 0.2519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23882758617401123, + "step": 730, + "valid_targets_mean": 5065.3, + "valid_targets_min": 1942 + }, + { + "epoch": 1.1741214057507987, + "grad_norm": 0.5202455361914546, + "learning_rate": 3.9450092134722984e-05, + "loss": 0.2393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24885712563991547, + "step": 735, + "valid_targets_mean": 5193.4, + "valid_targets_min": 1693 + }, + { + "epoch": 1.182108626198083, + "grad_norm": 0.5590629911303716, + "learning_rate": 3.943138279646853e-05, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2640933394432068, + "step": 740, + "valid_targets_mean": 4765.4, + "valid_targets_min": 2236 + }, + { + "epoch": 1.1900958466453675, + "grad_norm": 0.4568215659004477, + "learning_rate": 3.941236507599234e-05, + "loss": 0.2538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23051276803016663, + "step": 745, + "valid_targets_mean": 5046.9, + "valid_targets_min": 1931 + }, + { + "epoch": 1.1980830670926517, + "grad_norm": 0.44712112441589846, + "learning_rate": 3.93930392751117e-05, + "loss": 0.2225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21604947745800018, + "step": 750, + "valid_targets_mean": 5026.4, + "valid_targets_min": 1153 + }, + { + "epoch": 1.206070287539936, + "grad_norm": 0.5591497129271406, + "learning_rate": 3.9373405700533204e-05, + "loss": 0.221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21242642402648926, + "step": 755, + "valid_targets_mean": 4334.6, + "valid_targets_min": 1623 + }, + { + "epoch": 1.2140575079872205, + "grad_norm": 0.5943967241943648, + "learning_rate": 3.935346466384793e-05, + "loss": 0.2416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29953110218048096, + "step": 760, + "valid_targets_mean": 5227.2, + "valid_targets_min": 920 + }, + { + "epoch": 1.2220447284345048, + "grad_norm": 0.4042075117948494, + "learning_rate": 3.933321648152646e-05, + "loss": 0.246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22984498739242554, + "step": 765, + "valid_targets_mean": 6071.8, + "valid_targets_min": 2067 + }, + { + "epoch": 1.230031948881789, + "grad_norm": 0.48485906424603825, + "learning_rate": 3.931266147491389e-05, + "loss": 0.2363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23701247572898865, + "step": 770, + "valid_targets_mean": 4539.6, + "valid_targets_min": 912 + }, + { + "epoch": 1.2380191693290734, + "grad_norm": 0.484609973987031, + "learning_rate": 3.929179997022471e-05, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22221249341964722, + "step": 775, + "valid_targets_mean": 4264.4, + "valid_targets_min": 2231 + }, + { + "epoch": 1.2460063897763578, + "grad_norm": 0.4634913749349423, + "learning_rate": 3.927063229853763e-05, + "loss": 0.2505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2315971553325653, + "step": 780, + "valid_targets_mean": 4269.0, + "valid_targets_min": 1298 + }, + { + "epoch": 1.2539936102236422, + "grad_norm": 0.46003637169332934, + "learning_rate": 3.9249158795790316e-05, + "loss": 0.233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21694815158843994, + "step": 785, + "valid_targets_mean": 4534.9, + "valid_targets_min": 1367 + }, + { + "epoch": 1.2619808306709266, + "grad_norm": 0.5159552397005117, + "learning_rate": 3.9227379802774106e-05, + "loss": 0.2468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22513447701931, + "step": 790, + "valid_targets_mean": 4702.6, + "valid_targets_min": 933 + }, + { + "epoch": 1.269968051118211, + "grad_norm": 1.1512499859539236, + "learning_rate": 3.920529566512852e-05, + "loss": 0.2435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23088482022285461, + "step": 795, + "valid_targets_mean": 5759.7, + "valid_targets_min": 2373 + }, + { + "epoch": 1.2779552715654952, + "grad_norm": 0.5434796049479874, + "learning_rate": 3.918290673333585e-05, + "loss": 0.2382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2833731472492218, + "step": 800, + "valid_targets_mean": 4076.2, + "valid_targets_min": 568 + }, + { + "epoch": 1.2859424920127795, + "grad_norm": 0.5180856784834859, + "learning_rate": 3.916021336271556e-05, + "loss": 0.2491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23785400390625, + "step": 805, + "valid_targets_mean": 4955.7, + "valid_targets_min": 924 + }, + { + "epoch": 1.293929712460064, + "grad_norm": 0.5149958361755231, + "learning_rate": 3.913721591341867e-05, + "loss": 0.2219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25627684593200684, + "step": 810, + "valid_targets_mean": 5241.1, + "valid_targets_min": 1588 + }, + { + "epoch": 1.3019169329073481, + "grad_norm": 0.4502540860682754, + "learning_rate": 3.9113914750421985e-05, + "loss": 0.219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25584468245506287, + "step": 815, + "valid_targets_mean": 6717.8, + "valid_targets_min": 1683 + }, + { + "epoch": 1.3099041533546325, + "grad_norm": 0.5006857836953049, + "learning_rate": 3.9090310243522394e-05, + "loss": 0.2337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2447993904352188, + "step": 820, + "valid_targets_mean": 4522.4, + "valid_targets_min": 557 + }, + { + "epoch": 1.317891373801917, + "grad_norm": 0.530082191187644, + "learning_rate": 3.90664027673309e-05, + "loss": 0.2421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28234124183654785, + "step": 825, + "valid_targets_mean": 5410.7, + "valid_targets_min": 2137 + }, + { + "epoch": 1.3258785942492013, + "grad_norm": 0.4414104140990789, + "learning_rate": 3.904219270126677e-05, + "loss": 0.2199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20832520723342896, + "step": 830, + "valid_targets_mean": 4990.1, + "valid_targets_min": 1554 + }, + { + "epoch": 1.3338658146964857, + "grad_norm": 1.2054942942284992, + "learning_rate": 3.901768042955144e-05, + "loss": 0.2331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2427096664905548, + "step": 835, + "valid_targets_mean": 5274.6, + "valid_targets_min": 2181 + }, + { + "epoch": 1.34185303514377, + "grad_norm": 0.44040986559266476, + "learning_rate": 3.8992866341202446e-05, + "loss": 0.2206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23897823691368103, + "step": 840, + "valid_targets_mean": 6298.9, + "valid_targets_min": 320 + }, + { + "epoch": 1.3498402555910542, + "grad_norm": 0.42791841484545096, + "learning_rate": 3.8967750830027277e-05, + "loss": 0.2271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21991094946861267, + "step": 845, + "valid_targets_mean": 4994.8, + "valid_targets_min": 1678 + }, + { + "epoch": 1.3578274760383386, + "grad_norm": 0.4765755408468607, + "learning_rate": 3.894233429461706e-05, + "loss": 0.2293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24188624322414398, + "step": 850, + "valid_targets_mean": 4151.7, + "valid_targets_min": 640 + }, + { + "epoch": 1.365814696485623, + "grad_norm": 0.4702467492894455, + "learning_rate": 3.89166171383403e-05, + "loss": 0.2607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2250172346830368, + "step": 855, + "valid_targets_mean": 5215.4, + "valid_targets_min": 1781 + }, + { + "epoch": 1.3738019169329074, + "grad_norm": 0.5280309031579677, + "learning_rate": 3.889059976933644e-05, + "loss": 0.2176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22099585831165314, + "step": 860, + "valid_targets_mean": 5249.2, + "valid_targets_min": 1828 + }, + { + "epoch": 1.3817891373801916, + "grad_norm": 0.5191741974294681, + "learning_rate": 3.88642826005094e-05, + "loss": 0.2209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23172377049922943, + "step": 865, + "valid_targets_mean": 4564.9, + "valid_targets_min": 299 + }, + { + "epoch": 1.389776357827476, + "grad_norm": 0.4931770156824684, + "learning_rate": 3.883766604952102e-05, + "loss": 0.2405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22133614122867584, + "step": 870, + "valid_targets_mean": 4580.8, + "valid_targets_min": 1788 + }, + { + "epoch": 1.3977635782747604, + "grad_norm": 0.43973258214989486, + "learning_rate": 3.8810750538784404e-05, + "loss": 0.2397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2410700023174286, + "step": 875, + "valid_targets_mean": 5454.4, + "valid_targets_min": 268 + }, + { + "epoch": 1.4057507987220448, + "grad_norm": 0.46352392220990385, + "learning_rate": 3.878353649545728e-05, + "loss": 0.2305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2691129446029663, + "step": 880, + "valid_targets_mean": 4870.0, + "valid_targets_min": 1357 + }, + { + "epoch": 1.4137380191693292, + "grad_norm": 0.45638607377842, + "learning_rate": 3.875602435143517e-05, + "loss": 0.2413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2274504452943802, + "step": 885, + "valid_targets_mean": 5388.2, + "valid_targets_min": 1962 + }, + { + "epoch": 1.4217252396166133, + "grad_norm": 0.4790466979102037, + "learning_rate": 3.872821454334453e-05, + "loss": 0.2224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2431117296218872, + "step": 890, + "valid_targets_mean": 5412.2, + "valid_targets_min": 1351 + }, + { + "epoch": 1.4297124600638977, + "grad_norm": 0.4117592654298577, + "learning_rate": 3.870010751253587e-05, + "loss": 0.2244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2094022035598755, + "step": 895, + "valid_targets_mean": 5338.6, + "valid_targets_min": 337 + }, + { + "epoch": 1.4376996805111821, + "grad_norm": 0.5463838416850257, + "learning_rate": 3.867170370507668e-05, + "loss": 0.2105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21328377723693848, + "step": 900, + "valid_targets_mean": 3389.1, + "valid_targets_min": 846 + }, + { + "epoch": 1.4456869009584665, + "grad_norm": 0.4394986594361576, + "learning_rate": 3.8643003571744445e-05, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21873891353607178, + "step": 905, + "valid_targets_mean": 5016.2, + "valid_targets_min": 1875 + }, + { + "epoch": 1.4536741214057507, + "grad_norm": 0.4092707030943055, + "learning_rate": 3.861400756801938e-05, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21018297970294952, + "step": 910, + "valid_targets_mean": 5767.4, + "valid_targets_min": 2816 + }, + { + "epoch": 1.461661341853035, + "grad_norm": 0.465636175172399, + "learning_rate": 3.8584716154077286e-05, + "loss": 0.2195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22354570031166077, + "step": 915, + "valid_targets_mean": 6138.7, + "valid_targets_min": 2160 + }, + { + "epoch": 1.4696485623003195, + "grad_norm": 0.434468625549387, + "learning_rate": 3.855512979478222e-05, + "loss": 0.2457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22415438294410706, + "step": 920, + "valid_targets_mean": 6146.2, + "valid_targets_min": 609 + }, + { + "epoch": 1.4776357827476039, + "grad_norm": 0.44757829016572626, + "learning_rate": 3.852524895967911e-05, + "loss": 0.2387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23147371411323547, + "step": 925, + "valid_targets_mean": 5348.4, + "valid_targets_min": 2054 + }, + { + "epoch": 1.4856230031948883, + "grad_norm": 0.5097635877620077, + "learning_rate": 3.8495074122986296e-05, + "loss": 0.2309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27335506677627563, + "step": 930, + "valid_targets_mean": 5118.7, + "valid_targets_min": 1367 + }, + { + "epoch": 1.4936102236421724, + "grad_norm": 0.4776053358725652, + "learning_rate": 3.846460576358804e-05, + "loss": 0.224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22878849506378174, + "step": 935, + "valid_targets_mean": 4780.4, + "valid_targets_min": 1610 + }, + { + "epoch": 1.5015974440894568, + "grad_norm": 0.5356334827731245, + "learning_rate": 3.843384436502688e-05, + "loss": 0.2424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22441717982292175, + "step": 940, + "valid_targets_mean": 4326.4, + "valid_targets_min": 476 + }, + { + "epoch": 1.5095846645367412, + "grad_norm": 0.4512491618502969, + "learning_rate": 3.8402790415496e-05, + "loss": 0.2285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21335360407829285, + "step": 945, + "valid_targets_mean": 5230.7, + "valid_targets_min": 617 + }, + { + "epoch": 1.5175718849840254, + "grad_norm": 0.45906002413677505, + "learning_rate": 3.837144440783144e-05, + "loss": 0.2332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24478338658809662, + "step": 950, + "valid_targets_mean": 5632.8, + "valid_targets_min": 1674 + }, + { + "epoch": 1.5255591054313098, + "grad_norm": 0.49019467193719773, + "learning_rate": 3.833980683950431e-05, + "loss": 0.2411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2519046664237976, + "step": 955, + "valid_targets_mean": 5137.8, + "valid_targets_min": 932 + }, + { + "epoch": 1.5335463258785942, + "grad_norm": 0.42945027027443916, + "learning_rate": 3.8307878212612886e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1915363073348999, + "step": 960, + "valid_targets_mean": 4416.3, + "valid_targets_min": 761 + }, + { + "epoch": 1.5415335463258786, + "grad_norm": 0.5579055331895597, + "learning_rate": 3.827565903387461e-05, + "loss": 0.2176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21044248342514038, + "step": 965, + "valid_targets_mean": 5284.6, + "valid_targets_min": 2506 + }, + { + "epoch": 1.549520766773163, + "grad_norm": 0.5356246975818258, + "learning_rate": 3.82431498146181e-05, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2801210284233093, + "step": 970, + "valid_targets_mean": 5111.0, + "valid_targets_min": 648 + }, + { + "epoch": 1.5575079872204474, + "grad_norm": 0.4676912946078301, + "learning_rate": 3.821035107077499e-05, + "loss": 0.2415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2880561053752899, + "step": 975, + "valid_targets_mean": 5908.9, + "valid_targets_min": 729 + }, + { + "epoch": 1.5654952076677318, + "grad_norm": 0.418525383451211, + "learning_rate": 3.817726332287179e-05, + "loss": 0.2233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21174396574497223, + "step": 980, + "valid_targets_mean": 5006.8, + "valid_targets_min": 817 + }, + { + "epoch": 1.573482428115016, + "grad_norm": 0.5929561302080186, + "learning_rate": 3.814388709602155e-05, + "loss": 0.2348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2387070208787918, + "step": 985, + "valid_targets_mean": 4222.9, + "valid_targets_min": 1167 + }, + { + "epoch": 1.5814696485623003, + "grad_norm": 0.4595638312331319, + "learning_rate": 3.8110222919915626e-05, + "loss": 0.2233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23978567123413086, + "step": 990, + "valid_targets_mean": 5538.3, + "valid_targets_min": 1108 + }, + { + "epoch": 1.5894568690095847, + "grad_norm": 0.5512689817880697, + "learning_rate": 3.8076271328815175e-05, + "loss": 0.2346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23280058801174164, + "step": 995, + "valid_targets_mean": 4920.5, + "valid_targets_min": 724 + }, + { + "epoch": 1.5974440894568689, + "grad_norm": 0.42805027733164774, + "learning_rate": 3.804203286154275e-05, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21907193958759308, + "step": 1000, + "valid_targets_mean": 5069.7, + "valid_targets_min": 1920 + }, + { + "epoch": 1.6054313099041533, + "grad_norm": 0.4858809130152022, + "learning_rate": 3.800750806147371e-05, + "loss": 0.2374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22444787621498108, + "step": 1005, + "valid_targets_mean": 4515.1, + "valid_targets_min": 714 + }, + { + "epoch": 1.6134185303514377, + "grad_norm": 0.5302128778041387, + "learning_rate": 3.79726974765276e-05, + "loss": 0.2356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25729191303253174, + "step": 1010, + "valid_targets_mean": 4565.3, + "valid_targets_min": 1548 + }, + { + "epoch": 1.621405750798722, + "grad_norm": 0.4829977405730259, + "learning_rate": 3.793760165915947e-05, + "loss": 0.2341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23642590641975403, + "step": 1015, + "valid_targets_mean": 4304.2, + "valid_targets_min": 478 + }, + { + "epoch": 1.6293929712460065, + "grad_norm": 0.4594198791814113, + "learning_rate": 3.7902221166351106e-05, + "loss": 0.2234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20123255252838135, + "step": 1020, + "valid_targets_mean": 5075.9, + "valid_targets_min": 1258 + }, + { + "epoch": 1.6373801916932909, + "grad_norm": 0.4426802622554693, + "learning_rate": 3.786655655960216e-05, + "loss": 0.2255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23082798719406128, + "step": 1025, + "valid_targets_mean": 5212.6, + "valid_targets_min": 1279 + }, + { + "epoch": 1.645367412140575, + "grad_norm": 0.4870481188557817, + "learning_rate": 3.7830608404921294e-05, + "loss": 0.2074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21210849285125732, + "step": 1030, + "valid_targets_mean": 4509.0, + "valid_targets_min": 1346 + }, + { + "epoch": 1.6533546325878594, + "grad_norm": 0.4698781770865886, + "learning_rate": 3.7794377272817144e-05, + "loss": 0.2173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2248748391866684, + "step": 1035, + "valid_targets_mean": 5013.8, + "valid_targets_min": 829 + }, + { + "epoch": 1.6613418530351438, + "grad_norm": 0.4361307082713528, + "learning_rate": 3.775786373828929e-05, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.222589910030365, + "step": 1040, + "valid_targets_mean": 5144.6, + "valid_targets_min": 770 + }, + { + "epoch": 1.669329073482428, + "grad_norm": 0.49522338344118094, + "learning_rate": 3.7721068380819135e-05, + "loss": 0.2193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2181740403175354, + "step": 1045, + "valid_targets_mean": 4062.8, + "valid_targets_min": 860 + }, + { + "epoch": 1.6773162939297124, + "grad_norm": 0.49702593993884003, + "learning_rate": 3.768399178436069e-05, + "loss": 0.2349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23673710227012634, + "step": 1050, + "valid_targets_mean": 4153.0, + "valid_targets_min": 1755 + }, + { + "epoch": 1.6853035143769968, + "grad_norm": 0.455513257026603, + "learning_rate": 3.764663453733135e-05, + "loss": 0.2378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2705381512641907, + "step": 1055, + "valid_targets_mean": 5011.4, + "valid_targets_min": 1194 + }, + { + "epoch": 1.6932907348242812, + "grad_norm": 0.48342636718296034, + "learning_rate": 3.7608997232602475e-05, + "loss": 0.2342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21209147572517395, + "step": 1060, + "valid_targets_mean": 4200.3, + "valid_targets_min": 1032 + }, + { + "epoch": 1.7012779552715656, + "grad_norm": 0.4680188702163582, + "learning_rate": 3.757108046749006e-05, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26718616485595703, + "step": 1065, + "valid_targets_mean": 4864.6, + "valid_targets_min": 710 + }, + { + "epoch": 1.70926517571885, + "grad_norm": 0.46173075527590035, + "learning_rate": 3.753288484374524e-05, + "loss": 0.2297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2084069550037384, + "step": 1070, + "valid_targets_mean": 4525.0, + "valid_targets_min": 1873 + }, + { + "epoch": 1.7172523961661343, + "grad_norm": 0.45688747124270174, + "learning_rate": 3.7494410967544674e-05, + "loss": 0.2318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22160950303077698, + "step": 1075, + "valid_targets_mean": 5390.1, + "valid_targets_min": 3406 + }, + { + "epoch": 1.7252396166134185, + "grad_norm": 0.5994722237990151, + "learning_rate": 3.745565944948103e-05, + "loss": 0.2156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21359854936599731, + "step": 1080, + "valid_targets_mean": 4494.8, + "valid_targets_min": 1172 + }, + { + "epoch": 1.733226837060703, + "grad_norm": 0.42158057178019015, + "learning_rate": 3.7416630904553205e-05, + "loss": 0.2287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2346889078617096, + "step": 1085, + "valid_targets_mean": 5971.8, + "valid_targets_min": 2006 + }, + { + "epoch": 1.741214057507987, + "grad_norm": 0.41370122622856514, + "learning_rate": 3.737732595215663e-05, + "loss": 0.2244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2176918089389801, + "step": 1090, + "valid_targets_mean": 5168.2, + "valid_targets_min": 1622 + }, + { + "epoch": 1.7492012779552715, + "grad_norm": 0.42953107682395736, + "learning_rate": 3.733774521607338e-05, + "loss": 0.2282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23724493384361267, + "step": 1095, + "valid_targets_mean": 5557.6, + "valid_targets_min": 1198 + }, + { + "epoch": 1.7571884984025559, + "grad_norm": 0.5657788201117734, + "learning_rate": 3.729788932446231e-05, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25702691078186035, + "step": 1100, + "valid_targets_mean": 4413.9, + "valid_targets_min": 794 + }, + { + "epoch": 1.7651757188498403, + "grad_norm": 0.417477496167328, + "learning_rate": 3.7257758909849107e-05, + "loss": 0.2238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1908511519432068, + "step": 1105, + "valid_targets_mean": 4390.8, + "valid_targets_min": 765 + }, + { + "epoch": 1.7731629392971247, + "grad_norm": 0.4384968778126525, + "learning_rate": 3.72173546091162e-05, + "loss": 0.2114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20780757069587708, + "step": 1110, + "valid_targets_mean": 5119.9, + "valid_targets_min": 1113 + }, + { + "epoch": 1.781150159744409, + "grad_norm": 0.469616288326366, + "learning_rate": 3.7176677063492685e-05, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2339574694633484, + "step": 1115, + "valid_targets_mean": 4781.5, + "valid_targets_min": 1392 + }, + { + "epoch": 1.7891373801916934, + "grad_norm": 0.40034591514089823, + "learning_rate": 3.713572691854414e-05, + "loss": 0.2339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23980014026165009, + "step": 1120, + "valid_targets_mean": 6481.1, + "valid_targets_min": 1932 + }, + { + "epoch": 1.7971246006389776, + "grad_norm": 0.4320316231238413, + "learning_rate": 3.709450482416239e-05, + "loss": 0.2419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25469911098480225, + "step": 1125, + "valid_targets_mean": 6072.1, + "valid_targets_min": 2053 + }, + { + "epoch": 1.805111821086262, + "grad_norm": 0.4330826211238907, + "learning_rate": 3.7053011434555165e-05, + "loss": 0.2195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2246796190738678, + "step": 1130, + "valid_targets_mean": 4668.4, + "valid_targets_min": 2346 + }, + { + "epoch": 1.8130990415335462, + "grad_norm": 0.4364883586042583, + "learning_rate": 3.701124740823575e-05, + "loss": 0.2313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22576244175434113, + "step": 1135, + "valid_targets_mean": 4683.9, + "valid_targets_min": 936 + }, + { + "epoch": 1.8210862619808306, + "grad_norm": 0.48768121572116485, + "learning_rate": 3.696921340801253e-05, + "loss": 0.23, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23971682786941528, + "step": 1140, + "valid_targets_mean": 4992.1, + "valid_targets_min": 1319 + }, + { + "epoch": 1.829073482428115, + "grad_norm": 0.46689741570673715, + "learning_rate": 3.6926910100978444e-05, + "loss": 0.2209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20149345695972443, + "step": 1145, + "valid_targets_mean": 4508.4, + "valid_targets_min": 1663 + }, + { + "epoch": 1.8370607028753994, + "grad_norm": 0.4650335934315107, + "learning_rate": 3.688433815850041e-05, + "loss": 0.2246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23571673035621643, + "step": 1150, + "valid_targets_mean": 4445.2, + "valid_targets_min": 942 + }, + { + "epoch": 1.8450479233226837, + "grad_norm": 0.49587787282658763, + "learning_rate": 3.68414982562087e-05, + "loss": 0.2345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25807803869247437, + "step": 1155, + "valid_targets_mean": 4298.6, + "valid_targets_min": 1095 + }, + { + "epoch": 1.8530351437699681, + "grad_norm": 0.4711400556625992, + "learning_rate": 3.679839107398618e-05, + "loss": 0.2073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22391392290592194, + "step": 1160, + "valid_targets_mean": 4605.8, + "valid_targets_min": 1079 + }, + { + "epoch": 1.8610223642172525, + "grad_norm": 0.4952899600117989, + "learning_rate": 3.6755017295957536e-05, + "loss": 0.2273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2246803343296051, + "step": 1165, + "valid_targets_mean": 4714.9, + "valid_targets_min": 1269 + }, + { + "epoch": 1.8690095846645367, + "grad_norm": 0.4505280090095968, + "learning_rate": 3.67113776104784e-05, + "loss": 0.2324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25178074836730957, + "step": 1170, + "valid_targets_mean": 5578.4, + "valid_targets_min": 1360 + }, + { + "epoch": 1.876996805111821, + "grad_norm": 0.45568302444103076, + "learning_rate": 3.6667472710124475e-05, + "loss": 0.2292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22000402212142944, + "step": 1175, + "valid_targets_mean": 6081.1, + "valid_targets_min": 1049 + }, + { + "epoch": 1.8849840255591053, + "grad_norm": 0.521807718623954, + "learning_rate": 3.6623303291680465e-05, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2112407088279724, + "step": 1180, + "valid_targets_mean": 3739.1, + "valid_targets_min": 483 + }, + { + "epoch": 1.8929712460063897, + "grad_norm": 0.4625676303413823, + "learning_rate": 3.6578870056129086e-05, + "loss": 0.2522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22916141152381897, + "step": 1185, + "valid_targets_mean": 4772.2, + "valid_targets_min": 2216 + }, + { + "epoch": 1.900958466453674, + "grad_norm": 0.4563568093746638, + "learning_rate": 3.653417370863992e-05, + "loss": 0.2389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23207005858421326, + "step": 1190, + "valid_targets_mean": 4272.0, + "valid_targets_min": 1679 + }, + { + "epoch": 1.9089456869009584, + "grad_norm": 0.4874312100681554, + "learning_rate": 3.6489214958558206e-05, + "loss": 0.2092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21660256385803223, + "step": 1195, + "valid_targets_mean": 4120.2, + "valid_targets_min": 1090 + }, + { + "epoch": 1.9169329073482428, + "grad_norm": 0.42785031525017525, + "learning_rate": 3.644399451939358e-05, + "loss": 0.2256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21264520287513733, + "step": 1200, + "valid_targets_mean": 4690.1, + "valid_targets_min": 1768 + }, + { + "epoch": 1.9249201277955272, + "grad_norm": 0.43725812400863223, + "learning_rate": 3.639851310880881e-05, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24090486764907837, + "step": 1205, + "valid_targets_mean": 4846.9, + "valid_targets_min": 2437 + }, + { + "epoch": 1.9329073482428116, + "grad_norm": 0.4716273434408524, + "learning_rate": 3.635277144860834e-05, + "loss": 0.2362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24312490224838257, + "step": 1210, + "valid_targets_mean": 6083.0, + "valid_targets_min": 2349 + }, + { + "epoch": 1.9408945686900958, + "grad_norm": 0.40108176875471574, + "learning_rate": 3.630677026472684e-05, + "loss": 0.2285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21307267248630524, + "step": 1215, + "valid_targets_mean": 5714.7, + "valid_targets_min": 2174 + }, + { + "epoch": 1.9488817891373802, + "grad_norm": 0.49671707842355, + "learning_rate": 3.626051028721773e-05, + "loss": 0.2248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2293505221605301, + "step": 1220, + "valid_targets_mean": 3788.6, + "valid_targets_min": 543 + }, + { + "epoch": 1.9568690095846646, + "grad_norm": 0.44107663935637476, + "learning_rate": 3.621399225024156e-05, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20223724842071533, + "step": 1225, + "valid_targets_mean": 4862.7, + "valid_targets_min": 943 + }, + { + "epoch": 1.9648562300319488, + "grad_norm": 0.576803228028713, + "learning_rate": 3.616721689205436e-05, + "loss": 0.22, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21243348717689514, + "step": 1230, + "valid_targets_mean": 4010.9, + "valid_targets_min": 850 + }, + { + "epoch": 1.9728434504792332, + "grad_norm": 0.4387186194383008, + "learning_rate": 3.612018495499594e-05, + "loss": 0.2303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21914973855018616, + "step": 1235, + "valid_targets_mean": 5108.9, + "valid_targets_min": 2566 + }, + { + "epoch": 1.9808306709265175, + "grad_norm": 0.45509359256484455, + "learning_rate": 3.6072897185478074e-05, + "loss": 0.2057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19633805751800537, + "step": 1240, + "valid_targets_mean": 5676.6, + "valid_targets_min": 2015 + }, + { + "epoch": 1.988817891373802, + "grad_norm": 0.4285410701862073, + "learning_rate": 3.6025354333972714e-05, + "loss": 0.24, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23625974357128143, + "step": 1245, + "valid_targets_mean": 5396.7, + "valid_targets_min": 2667 + }, + { + "epoch": 1.9968051118210863, + "grad_norm": 0.5719947285802028, + "learning_rate": 3.597755715500002e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25348833203315735, + "step": 1250, + "valid_targets_mean": 3761.4, + "valid_targets_min": 1730 + }, + { + "epoch": 2.0047923322683707, + "grad_norm": 0.4535662103492767, + "learning_rate": 3.592950640711642e-05, + "loss": 0.2229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2162315994501114, + "step": 1255, + "valid_targets_mean": 3989.9, + "valid_targets_min": 907 + }, + { + "epoch": 2.012779552715655, + "grad_norm": 0.4410351357350173, + "learning_rate": 3.5881202852902543e-05, + "loss": 0.2057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20752325654029846, + "step": 1260, + "valid_targets_mean": 5998.4, + "valid_targets_min": 2837 + }, + { + "epoch": 2.0207667731629395, + "grad_norm": 0.47898384165843794, + "learning_rate": 3.583264725895117e-05, + "loss": 0.2162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20195890963077545, + "step": 1265, + "valid_targets_mean": 4670.1, + "valid_targets_min": 1350 + }, + { + "epoch": 2.0287539936102235, + "grad_norm": 0.5515945007484138, + "learning_rate": 3.5783840395854984e-05, + "loss": 0.205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19788508117198944, + "step": 1270, + "valid_targets_mean": 5088.9, + "valid_targets_min": 1475 + }, + { + "epoch": 2.036741214057508, + "grad_norm": 0.5431215978248436, + "learning_rate": 3.5734783038194446e-05, + "loss": 0.2078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24707621335983276, + "step": 1275, + "valid_targets_mean": 4470.7, + "valid_targets_min": 572 + }, + { + "epoch": 2.0447284345047922, + "grad_norm": 0.6459598360479897, + "learning_rate": 3.5685475964525396e-05, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2134454846382141, + "step": 1280, + "valid_targets_mean": 4424.7, + "valid_targets_min": 1468 + }, + { + "epoch": 2.0527156549520766, + "grad_norm": 0.47235874120995064, + "learning_rate": 3.5635919957366775e-05, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17809467017650604, + "step": 1285, + "valid_targets_mean": 4389.7, + "valid_targets_min": 634 + }, + { + "epoch": 2.060702875399361, + "grad_norm": 0.43600370251565135, + "learning_rate": 3.558611580318817e-05, + "loss": 0.2093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19783927500247955, + "step": 1290, + "valid_targets_mean": 5044.0, + "valid_targets_min": 2058 + }, + { + "epoch": 2.0686900958466454, + "grad_norm": 0.4309467855687483, + "learning_rate": 3.5536064292397334e-05, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1924149990081787, + "step": 1295, + "valid_targets_mean": 5567.2, + "valid_targets_min": 2060 + }, + { + "epoch": 2.07667731629393, + "grad_norm": 0.4579013316960597, + "learning_rate": 3.548576621932766e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20921730995178223, + "step": 1300, + "valid_targets_mean": 5577.4, + "valid_targets_min": 889 + }, + { + "epoch": 2.084664536741214, + "grad_norm": 0.4435730045136153, + "learning_rate": 3.543522238222555e-05, + "loss": 0.2119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18302898108959198, + "step": 1305, + "valid_targets_mean": 5364.7, + "valid_targets_min": 2309 + }, + { + "epoch": 2.0926517571884986, + "grad_norm": 0.4845583718038434, + "learning_rate": 3.538443358323777e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2209482342004776, + "step": 1310, + "valid_targets_mean": 4514.1, + "valid_targets_min": 1366 + }, + { + "epoch": 2.1006389776357826, + "grad_norm": 0.596915693931467, + "learning_rate": 3.5333400628398696e-05, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1920449137687683, + "step": 1315, + "valid_targets_mean": 5001.5, + "valid_targets_min": 941 + }, + { + "epoch": 2.108626198083067, + "grad_norm": 0.4537412759793582, + "learning_rate": 3.5282124327617556e-05, + "loss": 0.2168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23330473899841309, + "step": 1320, + "valid_targets_mean": 4856.5, + "valid_targets_min": 851 + }, + { + "epoch": 2.1166134185303513, + "grad_norm": 0.5170397096645349, + "learning_rate": 3.5230605494665535e-05, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24425125122070312, + "step": 1325, + "valid_targets_mean": 5382.1, + "valid_targets_min": 1194 + }, + { + "epoch": 2.1246006389776357, + "grad_norm": 0.4546000330875769, + "learning_rate": 3.517884494716289e-05, + "loss": 0.2061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19844010472297668, + "step": 1330, + "valid_targets_mean": 4694.7, + "valid_targets_min": 2177 + }, + { + "epoch": 2.13258785942492, + "grad_norm": 0.4666137402009551, + "learning_rate": 3.512684350656596e-05, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22991979122161865, + "step": 1335, + "valid_targets_mean": 4580.0, + "valid_targets_min": 1281 + }, + { + "epoch": 2.1405750798722045, + "grad_norm": 0.43841659587073956, + "learning_rate": 3.507460199815414e-05, + "loss": 0.2329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23546649515628815, + "step": 1340, + "valid_targets_mean": 5230.1, + "valid_targets_min": 2442 + }, + { + "epoch": 2.148562300319489, + "grad_norm": 0.43409838130564055, + "learning_rate": 3.5022121251016786e-05, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20348697900772095, + "step": 1345, + "valid_targets_mean": 5317.4, + "valid_targets_min": 1682 + }, + { + "epoch": 2.1565495207667733, + "grad_norm": 0.44719714225413865, + "learning_rate": 3.4969402098040015e-05, + "loss": 0.2087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17266574501991272, + "step": 1350, + "valid_targets_mean": 4226.5, + "valid_targets_min": 659 + }, + { + "epoch": 2.1645367412140577, + "grad_norm": 0.4185009785089201, + "learning_rate": 3.491644537589356e-05, + "loss": 0.2182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20402270555496216, + "step": 1355, + "valid_targets_mean": 5367.0, + "valid_targets_min": 1574 + }, + { + "epoch": 2.1725239616613417, + "grad_norm": 0.4300283341003427, + "learning_rate": 3.486325192501743e-05, + "loss": 0.2023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19802656769752502, + "step": 1360, + "valid_targets_mean": 4707.4, + "valid_targets_min": 1393 + }, + { + "epoch": 2.180511182108626, + "grad_norm": 0.4244014902369462, + "learning_rate": 3.480982258960859e-05, + "loss": 0.2025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18263283371925354, + "step": 1365, + "valid_targets_mean": 4805.6, + "valid_targets_min": 1855 + }, + { + "epoch": 2.1884984025559104, + "grad_norm": 0.4259899136783193, + "learning_rate": 3.47561582176076e-05, + "loss": 0.2063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20305916666984558, + "step": 1370, + "valid_targets_mean": 5255.1, + "valid_targets_min": 2441 + }, + { + "epoch": 2.196485623003195, + "grad_norm": 0.4774516932266148, + "learning_rate": 3.470225966068507e-05, + "loss": 0.2082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2115797996520996, + "step": 1375, + "valid_targets_mean": 5119.3, + "valid_targets_min": 835 + }, + { + "epoch": 2.2044728434504792, + "grad_norm": 0.4548863057712348, + "learning_rate": 3.464812777422826e-05, + "loss": 0.2188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1970842033624649, + "step": 1380, + "valid_targets_mean": 4329.1, + "valid_targets_min": 1090 + }, + { + "epoch": 2.2124600638977636, + "grad_norm": 0.4892500528484616, + "learning_rate": 3.459376341732741e-05, + "loss": 0.1901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.220895454287529, + "step": 1385, + "valid_targets_mean": 5527.6, + "valid_targets_min": 1416 + }, + { + "epoch": 2.220447284345048, + "grad_norm": 0.489398730051602, + "learning_rate": 3.453916745276213e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20596617460250854, + "step": 1390, + "valid_targets_mean": 4585.8, + "valid_targets_min": 587 + }, + { + "epoch": 2.2284345047923324, + "grad_norm": 0.47959886490092324, + "learning_rate": 3.4484340746987745e-05, + "loss": 0.2066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18780018389225006, + "step": 1395, + "valid_targets_mean": 4551.4, + "valid_targets_min": 2053 + }, + { + "epoch": 2.236421725239617, + "grad_norm": 0.4620077078997459, + "learning_rate": 3.4429284170121494e-05, + "loss": 0.2117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23137012124061584, + "step": 1400, + "valid_targets_mean": 5430.0, + "valid_targets_min": 1432 + }, + { + "epoch": 2.244408945686901, + "grad_norm": 0.49180601508096844, + "learning_rate": 3.4373998595928764e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20962712168693542, + "step": 1405, + "valid_targets_mean": 4032.1, + "valid_targets_min": 1012 + }, + { + "epoch": 2.252396166134185, + "grad_norm": 0.48861738455509557, + "learning_rate": 3.431848490180919e-05, + "loss": 0.2037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21679922938346863, + "step": 1410, + "valid_targets_mean": 4853.1, + "valid_targets_min": 916 + }, + { + "epoch": 2.2603833865814695, + "grad_norm": 0.408604716929785, + "learning_rate": 3.4262743968782736e-05, + "loss": 0.201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16209134459495544, + "step": 1415, + "valid_targets_mean": 4628.4, + "valid_targets_min": 2578 + }, + { + "epoch": 2.268370607028754, + "grad_norm": 0.43752774127215555, + "learning_rate": 3.4206776681475737e-05, + "loss": 0.2193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24136966466903687, + "step": 1420, + "valid_targets_mean": 5033.4, + "valid_targets_min": 909 + }, + { + "epoch": 2.2763578274760383, + "grad_norm": 0.43560432902877066, + "learning_rate": 3.415058392810682e-05, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.198669895529747, + "step": 1425, + "valid_targets_mean": 4891.9, + "valid_targets_min": 784 + }, + { + "epoch": 2.2843450479233227, + "grad_norm": 0.580378603983258, + "learning_rate": 3.409416660047286e-05, + "loss": 0.2174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21435877680778503, + "step": 1430, + "valid_targets_mean": 4078.2, + "valid_targets_min": 1318 + }, + { + "epoch": 2.292332268370607, + "grad_norm": 0.40700558817808163, + "learning_rate": 3.403752559393477e-05, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18513426184654236, + "step": 1435, + "valid_targets_mean": 5148.9, + "valid_targets_min": 1067 + }, + { + "epoch": 2.3003194888178915, + "grad_norm": 0.4799030454563342, + "learning_rate": 3.398066180740332e-05, + "loss": 0.2173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22550812363624573, + "step": 1440, + "valid_targets_mean": 5266.4, + "valid_targets_min": 1313 + }, + { + "epoch": 2.308306709265176, + "grad_norm": 0.4974331710461038, + "learning_rate": 3.39235761433249e-05, + "loss": 0.2133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2103099226951599, + "step": 1445, + "valid_targets_mean": 4404.1, + "valid_targets_min": 2121 + }, + { + "epoch": 2.31629392971246, + "grad_norm": 0.4340003337295617, + "learning_rate": 3.386626950766713e-05, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20411810278892517, + "step": 1450, + "valid_targets_mean": 4893.8, + "valid_targets_min": 1976 + }, + { + "epoch": 2.3242811501597442, + "grad_norm": 0.4577279718543424, + "learning_rate": 3.380874280990454e-05, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1925685852766037, + "step": 1455, + "valid_targets_mean": 4346.2, + "valid_targets_min": 682 + }, + { + "epoch": 2.3322683706070286, + "grad_norm": 0.43413317097279497, + "learning_rate": 3.37509969630041e-05, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21768368780612946, + "step": 1460, + "valid_targets_mean": 5565.8, + "valid_targets_min": 2098 + }, + { + "epoch": 2.340255591054313, + "grad_norm": 0.4536871654290314, + "learning_rate": 3.369303288341078e-05, + "loss": 0.2123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1936158984899521, + "step": 1465, + "valid_targets_mean": 4365.2, + "valid_targets_min": 1036 + }, + { + "epoch": 2.3482428115015974, + "grad_norm": 0.4567310654580342, + "learning_rate": 3.363485149103292e-05, + "loss": 0.2183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25850218534469604, + "step": 1470, + "valid_targets_mean": 5031.1, + "valid_targets_min": 2001 + }, + { + "epoch": 2.356230031948882, + "grad_norm": 0.5977141302333794, + "learning_rate": 3.357645370922772e-05, + "loss": 0.2017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23247145116329193, + "step": 1475, + "valid_targets_mean": 3937.1, + "valid_targets_min": 625 + }, + { + "epoch": 2.364217252396166, + "grad_norm": 0.44895842484039244, + "learning_rate": 3.351784046478655e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21234402060508728, + "step": 1480, + "valid_targets_mean": 5188.9, + "valid_targets_min": 819 + }, + { + "epoch": 2.3722044728434506, + "grad_norm": 0.47003250547900366, + "learning_rate": 3.345901268792022e-05, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19264158606529236, + "step": 1485, + "valid_targets_mean": 4191.7, + "valid_targets_min": 568 + }, + { + "epoch": 2.380191693290735, + "grad_norm": 0.44506494499803045, + "learning_rate": 3.3399971312244255e-05, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22497370839118958, + "step": 1490, + "valid_targets_mean": 5244.2, + "valid_targets_min": 1405 + }, + { + "epoch": 2.3881789137380194, + "grad_norm": 0.46933729486270825, + "learning_rate": 3.334071727476406e-05, + "loss": 0.2138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22563546895980835, + "step": 1495, + "valid_targets_mean": 4995.9, + "valid_targets_min": 1358 + }, + { + "epoch": 2.3961661341853033, + "grad_norm": 0.5019361637475704, + "learning_rate": 3.3281251515860035e-05, + "loss": 0.2143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23603831231594086, + "step": 1500, + "valid_targets_mean": 5936.2, + "valid_targets_min": 288 + }, + { + "epoch": 2.4041533546325877, + "grad_norm": 0.40308147209479217, + "learning_rate": 3.3221574979272676e-05, + "loss": 0.2141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20887164771556854, + "step": 1505, + "valid_targets_mean": 6166.1, + "valid_targets_min": 836 + }, + { + "epoch": 2.412140575079872, + "grad_norm": 0.4398473748615588, + "learning_rate": 3.316168861208759e-05, + "loss": 0.2071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20377591252326965, + "step": 1510, + "valid_targets_mean": 4693.8, + "valid_targets_min": 590 + }, + { + "epoch": 2.4201277955271565, + "grad_norm": 0.5309344597523309, + "learning_rate": 3.310159336472047e-05, + "loss": 0.2061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21097645163536072, + "step": 1515, + "valid_targets_mean": 4200.9, + "valid_targets_min": 761 + }, + { + "epoch": 2.428115015974441, + "grad_norm": 0.5532178599586695, + "learning_rate": 3.304129019090198e-05, + "loss": 0.2098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2257629632949829, + "step": 1520, + "valid_targets_mean": 4738.1, + "valid_targets_min": 968 + }, + { + "epoch": 2.4361022364217253, + "grad_norm": 0.4607360256928386, + "learning_rate": 3.298078004766267e-05, + "loss": 0.1935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17515632510185242, + "step": 1525, + "valid_targets_mean": 3853.8, + "valid_targets_min": 1100 + }, + { + "epoch": 2.4440894568690097, + "grad_norm": 0.4370375317963132, + "learning_rate": 3.292006389531774e-05, + "loss": 0.1946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16992877423763275, + "step": 1530, + "valid_targets_mean": 4876.9, + "valid_targets_min": 1554 + }, + { + "epoch": 2.452076677316294, + "grad_norm": 0.41401464203003635, + "learning_rate": 3.2859142697451835e-05, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21531742811203003, + "step": 1535, + "valid_targets_mean": 6467.6, + "valid_targets_min": 2188 + }, + { + "epoch": 2.460063897763578, + "grad_norm": 0.6483636089392388, + "learning_rate": 3.279801742090372e-05, + "loss": 0.2113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26298800110816956, + "step": 1540, + "valid_targets_mean": 5260.5, + "valid_targets_min": 724 + }, + { + "epoch": 2.4680511182108624, + "grad_norm": 0.4802414273434107, + "learning_rate": 3.2736689035750975e-05, + "loss": 0.2031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20041510462760925, + "step": 1545, + "valid_targets_mean": 4248.7, + "valid_targets_min": 943 + }, + { + "epoch": 2.476038338658147, + "grad_norm": 0.4443471057730885, + "learning_rate": 3.2675158515294554e-05, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1807590126991272, + "step": 1550, + "valid_targets_mean": 4097.3, + "valid_targets_min": 1009 + }, + { + "epoch": 2.484025559105431, + "grad_norm": 0.4902750001832227, + "learning_rate": 3.2613426836043386e-05, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2025105357170105, + "step": 1555, + "valid_targets_mean": 4429.0, + "valid_targets_min": 862 + }, + { + "epoch": 2.4920127795527156, + "grad_norm": 0.4582351957066707, + "learning_rate": 3.2551494977698844e-05, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18224765360355377, + "step": 1560, + "valid_targets_mean": 4185.1, + "valid_targets_min": 519 + }, + { + "epoch": 2.5, + "grad_norm": 0.4254457086333195, + "learning_rate": 3.248936392313921e-05, + "loss": 0.2195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1733468770980835, + "step": 1565, + "valid_targets_mean": 4750.7, + "valid_targets_min": 1263 + }, + { + "epoch": 2.5079872204472844, + "grad_norm": 0.45224970751667104, + "learning_rate": 3.2427034658404056e-05, + "loss": 0.2269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21232850849628448, + "step": 1570, + "valid_targets_mean": 4944.6, + "valid_targets_min": 1634 + }, + { + "epoch": 2.515974440894569, + "grad_norm": 0.43711126622124813, + "learning_rate": 3.236450817267863e-05, + "loss": 0.2055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18365254998207092, + "step": 1575, + "valid_targets_mean": 4473.3, + "valid_targets_min": 721 + }, + { + "epoch": 2.523961661341853, + "grad_norm": 0.48918317077505047, + "learning_rate": 3.230178545827814e-05, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2108568251132965, + "step": 1580, + "valid_targets_mean": 4876.8, + "valid_targets_min": 592 + }, + { + "epoch": 2.5319488817891376, + "grad_norm": 0.3925030886140674, + "learning_rate": 3.223886751063201e-05, + "loss": 0.2179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20117145776748657, + "step": 1585, + "valid_targets_mean": 6112.6, + "valid_targets_min": 2397 + }, + { + "epoch": 2.539936102236422, + "grad_norm": 0.4505753806395508, + "learning_rate": 3.2175755328268054e-05, + "loss": 0.216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2128993570804596, + "step": 1590, + "valid_targets_mean": 5152.8, + "valid_targets_min": 1554 + }, + { + "epoch": 2.547923322683706, + "grad_norm": 0.40790132537857193, + "learning_rate": 3.2112449912796675e-05, + "loss": 0.2115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1775086373090744, + "step": 1595, + "valid_targets_mean": 5171.2, + "valid_targets_min": 1857 + }, + { + "epoch": 2.5559105431309903, + "grad_norm": 0.4318086807503156, + "learning_rate": 3.204895226889494e-05, + "loss": 0.1933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19410687685012817, + "step": 1600, + "valid_targets_mean": 4738.9, + "valid_targets_min": 1355 + }, + { + "epoch": 2.5638977635782747, + "grad_norm": 0.4636942859708295, + "learning_rate": 3.1985263404290634e-05, + "loss": 0.2164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2244848757982254, + "step": 1605, + "valid_targets_mean": 5147.4, + "valid_targets_min": 2813 + }, + { + "epoch": 2.571884984025559, + "grad_norm": 0.47397849207507037, + "learning_rate": 3.19213843297463e-05, + "loss": 0.2069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21858221292495728, + "step": 1610, + "valid_targets_mean": 4920.2, + "valid_targets_min": 1932 + }, + { + "epoch": 2.5798722044728435, + "grad_norm": 0.45197351575402667, + "learning_rate": 3.1857316059043144e-05, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17527048289775848, + "step": 1615, + "valid_targets_mean": 4120.8, + "valid_targets_min": 526 + }, + { + "epoch": 2.587859424920128, + "grad_norm": 0.5005051161387774, + "learning_rate": 3.1793059608964986e-05, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21094384789466858, + "step": 1620, + "valid_targets_mean": 5580.8, + "valid_targets_min": 1367 + }, + { + "epoch": 2.5958466453674123, + "grad_norm": 0.47871718041244976, + "learning_rate": 3.172861599928212e-05, + "loss": 0.202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17966912686824799, + "step": 1625, + "valid_targets_mean": 3921.2, + "valid_targets_min": 939 + }, + { + "epoch": 2.6038338658146962, + "grad_norm": 0.4375260791787283, + "learning_rate": 3.166398625273512e-05, + "loss": 0.2078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18762406706809998, + "step": 1630, + "valid_targets_mean": 4905.2, + "valid_targets_min": 1067 + }, + { + "epoch": 2.6118210862619806, + "grad_norm": 0.3983808594765356, + "learning_rate": 3.15991713950186e-05, + "loss": 0.2009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21603745222091675, + "step": 1635, + "valid_targets_mean": 5733.9, + "valid_targets_min": 2853 + }, + { + "epoch": 2.619808306709265, + "grad_norm": 0.45201676841219657, + "learning_rate": 3.153417245476495e-05, + "loss": 0.2186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23388880491256714, + "step": 1640, + "valid_targets_mean": 6155.7, + "valid_targets_min": 2887 + }, + { + "epoch": 2.6277955271565494, + "grad_norm": 0.46574710638041417, + "learning_rate": 3.146899046352804e-05, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1933613419532776, + "step": 1645, + "valid_targets_mean": 4483.0, + "valid_targets_min": 1141 + }, + { + "epoch": 2.635782747603834, + "grad_norm": 0.49115410149229843, + "learning_rate": 3.140362645576676e-05, + "loss": 0.2122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2165064811706543, + "step": 1650, + "valid_targets_mean": 4406.9, + "valid_targets_min": 632 + }, + { + "epoch": 2.643769968051118, + "grad_norm": 0.4422886145501157, + "learning_rate": 3.133808146882871e-05, + "loss": 0.196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1859450340270996, + "step": 1655, + "valid_targets_mean": 4923.4, + "valid_targets_min": 729 + }, + { + "epoch": 2.6517571884984026, + "grad_norm": 0.5105266767260843, + "learning_rate": 3.1272356542933654e-05, + "loss": 0.1918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21490055322647095, + "step": 1660, + "valid_targets_mean": 5973.7, + "valid_targets_min": 3336 + }, + { + "epoch": 2.659744408945687, + "grad_norm": 0.6953455819856134, + "learning_rate": 3.120645272115707e-05, + "loss": 0.2086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18827220797538757, + "step": 1665, + "valid_targets_mean": 4309.4, + "valid_targets_min": 1276 + }, + { + "epoch": 2.6677316293929714, + "grad_norm": 0.5154053185300087, + "learning_rate": 3.114037104941355e-05, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19753989577293396, + "step": 1670, + "valid_targets_mean": 4340.2, + "valid_targets_min": 1424 + }, + { + "epoch": 2.6757188498402558, + "grad_norm": 0.4782161700404796, + "learning_rate": 3.1074112576440236e-05, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.175480455160141, + "step": 1675, + "valid_targets_mean": 3959.3, + "valid_targets_min": 680 + }, + { + "epoch": 2.68370607028754, + "grad_norm": 0.4453801116013753, + "learning_rate": 3.1007678353780154e-05, + "loss": 0.2154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20716232061386108, + "step": 1680, + "valid_targets_mean": 5072.2, + "valid_targets_min": 1962 + }, + { + "epoch": 2.6916932907348246, + "grad_norm": 0.42387415100705655, + "learning_rate": 3.094106943576553e-05, + "loss": 0.2021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20682209730148315, + "step": 1685, + "valid_targets_mean": 5182.8, + "valid_targets_min": 2198 + }, + { + "epoch": 2.6996805111821085, + "grad_norm": 0.8505431396696655, + "learning_rate": 3.087428687950108e-05, + "loss": 0.1886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1699068248271942, + "step": 1690, + "valid_targets_mean": 5186.1, + "valid_targets_min": 1578 + }, + { + "epoch": 2.707667731629393, + "grad_norm": 0.42454000480579385, + "learning_rate": 3.080733174484719e-05, + "loss": 0.202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22431977093219757, + "step": 1695, + "valid_targets_mean": 5460.5, + "valid_targets_min": 1251 + }, + { + "epoch": 2.7156549520766773, + "grad_norm": 0.515113513985079, + "learning_rate": 3.074020509440313e-05, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18955373764038086, + "step": 1700, + "valid_targets_mean": 4786.7, + "valid_targets_min": 1748 + }, + { + "epoch": 2.7236421725239617, + "grad_norm": 0.4441426398491621, + "learning_rate": 3.0672907993490183e-05, + "loss": 0.2096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19987380504608154, + "step": 1705, + "valid_targets_mean": 5302.4, + "valid_targets_min": 1298 + }, + { + "epoch": 2.731629392971246, + "grad_norm": 0.5688162028968159, + "learning_rate": 3.060544151013473e-05, + "loss": 0.2023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20857849717140198, + "step": 1710, + "valid_targets_mean": 4687.8, + "valid_targets_min": 1559 + }, + { + "epoch": 2.7396166134185305, + "grad_norm": 0.4557968966348482, + "learning_rate": 3.053780671505132e-05, + "loss": 0.2105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19488662481307983, + "step": 1715, + "valid_targets_mean": 4964.9, + "valid_targets_min": 2075 + }, + { + "epoch": 2.747603833865815, + "grad_norm": 0.4645476414446028, + "learning_rate": 3.047000468162563e-05, + "loss": 0.2087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19949454069137573, + "step": 1720, + "valid_targets_mean": 4732.9, + "valid_targets_min": 543 + }, + { + "epoch": 2.755591054313099, + "grad_norm": 0.3966169427070985, + "learning_rate": 3.0402036485897496e-05, + "loss": 0.1972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1724996268749237, + "step": 1725, + "valid_targets_mean": 5957.0, + "valid_targets_min": 2236 + }, + { + "epoch": 2.763578274760383, + "grad_norm": 0.46798109501484725, + "learning_rate": 3.0333903206543786e-05, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2168780416250229, + "step": 1730, + "valid_targets_mean": 4231.0, + "valid_targets_min": 1070 + }, + { + "epoch": 2.7715654952076676, + "grad_norm": 0.4214469481286826, + "learning_rate": 3.02656059248613e-05, + "loss": 0.2054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19142383337020874, + "step": 1735, + "valid_targets_mean": 5115.2, + "valid_targets_min": 665 + }, + { + "epoch": 2.779552715654952, + "grad_norm": 0.4303775537672929, + "learning_rate": 3.0197145724749615e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16952644288539886, + "step": 1740, + "valid_targets_mean": 4943.2, + "valid_targets_min": 983 + }, + { + "epoch": 2.7875399361022364, + "grad_norm": 0.529210583397824, + "learning_rate": 3.0128523692693865e-05, + "loss": 0.2129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21570533514022827, + "step": 1745, + "valid_targets_mean": 5128.9, + "valid_targets_min": 1272 + }, + { + "epoch": 2.7955271565495208, + "grad_norm": 0.4875526018314679, + "learning_rate": 3.005974091774751e-05, + "loss": 0.2108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20090654492378235, + "step": 1750, + "valid_targets_mean": 5847.1, + "valid_targets_min": 3140 + }, + { + "epoch": 2.803514376996805, + "grad_norm": 0.49510928662774695, + "learning_rate": 2.999079849151505e-05, + "loss": 0.2039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1958474963903427, + "step": 1755, + "valid_targets_mean": 4237.8, + "valid_targets_min": 1373 + }, + { + "epoch": 2.8115015974440896, + "grad_norm": 0.44073636843613645, + "learning_rate": 2.992169750813471e-05, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22938530147075653, + "step": 1760, + "valid_targets_mean": 5790.8, + "valid_targets_min": 592 + }, + { + "epoch": 2.819488817891374, + "grad_norm": 0.48287316764874594, + "learning_rate": 2.985243906426106e-05, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18632280826568604, + "step": 1765, + "valid_targets_mean": 4863.8, + "valid_targets_min": 1510 + }, + { + "epoch": 2.8274760383386583, + "grad_norm": 0.4345674430560167, + "learning_rate": 2.9783024259047617e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19696888327598572, + "step": 1770, + "valid_targets_mean": 5276.3, + "valid_targets_min": 1217 + }, + { + "epoch": 2.8354632587859427, + "grad_norm": 0.4842821725079989, + "learning_rate": 2.971345419412941e-05, + "loss": 0.1918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18247222900390625, + "step": 1775, + "valid_targets_mean": 4082.5, + "valid_targets_min": 1215 + }, + { + "epoch": 2.8434504792332267, + "grad_norm": 0.42005980553544303, + "learning_rate": 2.964372997360548e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17339880764484406, + "step": 1780, + "valid_targets_mean": 5439.2, + "valid_targets_min": 1617 + }, + { + "epoch": 2.851437699680511, + "grad_norm": 0.47263819048565253, + "learning_rate": 2.957385270402137e-05, + "loss": 0.1952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1739785075187683, + "step": 1785, + "valid_targets_mean": 3770.3, + "valid_targets_min": 735 + }, + { + "epoch": 2.8594249201277955, + "grad_norm": 0.47326009235291333, + "learning_rate": 2.9503823494351565e-05, + "loss": 0.2067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2062799632549286, + "step": 1790, + "valid_targets_mean": 4597.7, + "valid_targets_min": 1477 + }, + { + "epoch": 2.86741214057508, + "grad_norm": 0.45549075497929276, + "learning_rate": 2.9433643455981874e-05, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19905966520309448, + "step": 1795, + "valid_targets_mean": 4486.9, + "valid_targets_min": 1078 + }, + { + "epoch": 2.8753993610223643, + "grad_norm": 0.41464722420988726, + "learning_rate": 2.9363313702691827e-05, + "loss": 0.2041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19828450679779053, + "step": 1800, + "valid_targets_mean": 5414.1, + "valid_targets_min": 372 + }, + { + "epoch": 2.8833865814696487, + "grad_norm": 0.4753585149002599, + "learning_rate": 2.9292835350636957e-05, + "loss": 0.214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2153431475162506, + "step": 1805, + "valid_targets_mean": 5618.3, + "valid_targets_min": 299 + }, + { + "epoch": 2.891373801916933, + "grad_norm": 0.5840186243183315, + "learning_rate": 2.922220951833111e-05, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23015804588794708, + "step": 1810, + "valid_targets_mean": 4671.9, + "valid_targets_min": 867 + }, + { + "epoch": 2.899361022364217, + "grad_norm": 0.4781950760792564, + "learning_rate": 2.9151437326628706e-05, + "loss": 0.2012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19905801117420197, + "step": 1815, + "valid_targets_mean": 4259.0, + "valid_targets_min": 1696 + }, + { + "epoch": 2.9073482428115014, + "grad_norm": 0.5309793198861545, + "learning_rate": 2.908051989870692e-05, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20656386017799377, + "step": 1820, + "valid_targets_mean": 3589.2, + "valid_targets_min": 657 + }, + { + "epoch": 2.915335463258786, + "grad_norm": 0.45103136219819573, + "learning_rate": 2.9009458360047867e-05, + "loss": 0.1986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1953534185886383, + "step": 1825, + "valid_targets_mean": 4900.6, + "valid_targets_min": 727 + }, + { + "epoch": 2.92332268370607, + "grad_norm": 0.43991916307467194, + "learning_rate": 2.8938253838420762e-05, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20834563672542572, + "step": 1830, + "valid_targets_mean": 4385.7, + "valid_targets_min": 1359 + }, + { + "epoch": 2.9313099041533546, + "grad_norm": 0.5114257063223862, + "learning_rate": 2.8866907463864006e-05, + "loss": 0.2157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23517729341983795, + "step": 1835, + "valid_targets_mean": 4265.8, + "valid_targets_min": 1344 + }, + { + "epoch": 2.939297124600639, + "grad_norm": 0.6007582058907062, + "learning_rate": 2.8795420368667225e-05, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17349465191364288, + "step": 1840, + "valid_targets_mean": 4591.4, + "valid_targets_min": 1352 + }, + { + "epoch": 2.9472843450479234, + "grad_norm": 0.47686092522453105, + "learning_rate": 2.8723793687353354e-05, + "loss": 0.2054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21949972212314606, + "step": 1845, + "valid_targets_mean": 4742.1, + "valid_targets_min": 737 + }, + { + "epoch": 2.9552715654952078, + "grad_norm": 0.4541073469272048, + "learning_rate": 2.8652028556660593e-05, + "loss": 0.2149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18832948803901672, + "step": 1850, + "valid_targets_mean": 5189.2, + "valid_targets_min": 644 + }, + { + "epoch": 2.963258785942492, + "grad_norm": 0.4881400662727766, + "learning_rate": 2.8580126115524373e-05, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1738438755273819, + "step": 1855, + "valid_targets_mean": 4233.8, + "valid_targets_min": 1245 + }, + { + "epoch": 2.9712460063897765, + "grad_norm": 0.4820316182908142, + "learning_rate": 2.8508087505059302e-05, + "loss": 0.2161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1900295913219452, + "step": 1860, + "valid_targets_mean": 4443.1, + "valid_targets_min": 870 + }, + { + "epoch": 2.979233226837061, + "grad_norm": 0.4534930639820006, + "learning_rate": 2.843591386854102e-05, + "loss": 0.2046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22404572367668152, + "step": 1865, + "valid_targets_mean": 4954.1, + "valid_targets_min": 1848 + }, + { + "epoch": 2.987220447284345, + "grad_norm": 0.5923245030683633, + "learning_rate": 2.8363606351388068e-05, + "loss": 0.2011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20152445137500763, + "step": 1870, + "valid_targets_mean": 5321.8, + "valid_targets_min": 358 + }, + { + "epoch": 2.9952076677316293, + "grad_norm": 0.46168562051066675, + "learning_rate": 2.829116610114375e-05, + "loss": 0.2094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2535361349582672, + "step": 1875, + "valid_targets_mean": 5529.3, + "valid_targets_min": 2116 + }, + { + "epoch": 3.0031948881789137, + "grad_norm": 0.4203505928859863, + "learning_rate": 2.821859426745786e-05, + "loss": 0.1864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16350862383842468, + "step": 1880, + "valid_targets_mean": 4606.1, + "valid_targets_min": 1318 + }, + { + "epoch": 3.011182108626198, + "grad_norm": 0.4596351934581623, + "learning_rate": 2.8145892002068454e-05, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20174235105514526, + "step": 1885, + "valid_targets_mean": 5120.6, + "valid_targets_min": 984 + }, + { + "epoch": 3.0191693290734825, + "grad_norm": 0.5067879256603599, + "learning_rate": 2.8073060458783606e-05, + "loss": 0.1917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18824288249015808, + "step": 1890, + "valid_targets_mean": 4709.0, + "valid_targets_min": 1568 + }, + { + "epoch": 3.027156549520767, + "grad_norm": 0.46646953173336714, + "learning_rate": 2.8000100793463056e-05, + "loss": 0.1786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16396231949329376, + "step": 1895, + "valid_targets_mean": 4063.9, + "valid_targets_min": 1475 + }, + { + "epoch": 3.0351437699680512, + "grad_norm": 0.5062153381404296, + "learning_rate": 2.792701416399988e-05, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17092962563037872, + "step": 1900, + "valid_targets_mean": 4888.4, + "valid_targets_min": 1583 + }, + { + "epoch": 3.0431309904153356, + "grad_norm": 0.4849825982152225, + "learning_rate": 2.7853801730302134e-05, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18273237347602844, + "step": 1905, + "valid_targets_mean": 4441.9, + "valid_targets_min": 1202 + }, + { + "epoch": 3.0511182108626196, + "grad_norm": 0.4427472193966588, + "learning_rate": 2.7780464654274396e-05, + "loss": 0.1979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1674235314130783, + "step": 1910, + "valid_targets_mean": 6172.0, + "valid_targets_min": 1178 + }, + { + "epoch": 3.059105431309904, + "grad_norm": 0.47957103937687784, + "learning_rate": 2.770700409979938e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17426815629005432, + "step": 1915, + "valid_targets_mean": 5122.2, + "valid_targets_min": 1210 + }, + { + "epoch": 3.0670926517571884, + "grad_norm": 0.4464980807866025, + "learning_rate": 2.7633421232719442e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1803584098815918, + "step": 1920, + "valid_targets_mean": 5715.9, + "valid_targets_min": 916 + }, + { + "epoch": 3.0750798722044728, + "grad_norm": 0.447283516112869, + "learning_rate": 2.7559717220818067e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17208212614059448, + "step": 1925, + "valid_targets_mean": 5718.5, + "valid_targets_min": 1113 + }, + { + "epoch": 3.083067092651757, + "grad_norm": 0.7231060695564152, + "learning_rate": 2.7485893233801366e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2085496187210083, + "step": 1930, + "valid_targets_mean": 5697.9, + "valid_targets_min": 1392 + }, + { + "epoch": 3.0910543130990416, + "grad_norm": 0.43218420898922144, + "learning_rate": 2.7411950443279458e-05, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17368361353874207, + "step": 1935, + "valid_targets_mean": 5389.1, + "valid_targets_min": 2315 + }, + { + "epoch": 3.099041533546326, + "grad_norm": 0.48232381925589013, + "learning_rate": 2.7337890022747945e-05, + "loss": 0.1967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18132419884204865, + "step": 1940, + "valid_targets_mean": 5897.9, + "valid_targets_min": 1980 + }, + { + "epoch": 3.1070287539936103, + "grad_norm": 0.44350201563584263, + "learning_rate": 2.7263713147569243e-05, + "loss": 0.182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17033743858337402, + "step": 1945, + "valid_targets_mean": 4619.8, + "valid_targets_min": 1470 + }, + { + "epoch": 3.1150159744408947, + "grad_norm": 0.48205388274643085, + "learning_rate": 2.7189420994953945e-05, + "loss": 0.1836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19110047817230225, + "step": 1950, + "valid_targets_mean": 4604.9, + "valid_targets_min": 952 + }, + { + "epoch": 3.123003194888179, + "grad_norm": 0.4367361113266311, + "learning_rate": 2.7115014743942114e-05, + "loss": 0.186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1712811291217804, + "step": 1955, + "valid_targets_mean": 4904.7, + "valid_targets_min": 2258 + }, + { + "epoch": 3.130990415335463, + "grad_norm": 0.438479271064866, + "learning_rate": 2.704049557538461e-05, + "loss": 0.1991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17538374662399292, + "step": 1960, + "valid_targets_mean": 5102.3, + "valid_targets_min": 1947 + }, + { + "epoch": 3.1389776357827475, + "grad_norm": 0.51415649262304, + "learning_rate": 2.696586467192434e-05, + "loss": 0.1802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1985991895198822, + "step": 1965, + "valid_targets_mean": 3896.9, + "valid_targets_min": 917 + }, + { + "epoch": 3.146964856230032, + "grad_norm": 0.43403612119727447, + "learning_rate": 2.6891123217977448e-05, + "loss": 0.184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1899278461933136, + "step": 1970, + "valid_targets_mean": 5828.6, + "valid_targets_min": 596 + }, + { + "epoch": 3.1549520766773163, + "grad_norm": 0.4985226013639757, + "learning_rate": 2.681627239971458e-05, + "loss": 0.1963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20802980661392212, + "step": 1975, + "valid_targets_mean": 5021.2, + "valid_targets_min": 543 + }, + { + "epoch": 3.1629392971246006, + "grad_norm": 0.4771929236008774, + "learning_rate": 2.6741313405041997e-05, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2114463746547699, + "step": 1980, + "valid_targets_mean": 5528.2, + "valid_targets_min": 1181 + }, + { + "epoch": 3.170926517571885, + "grad_norm": 0.4423988902881132, + "learning_rate": 2.666624742358279e-05, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18834856152534485, + "step": 1985, + "valid_targets_mean": 5797.9, + "valid_targets_min": 2396 + }, + { + "epoch": 3.1789137380191694, + "grad_norm": 0.4743308128905031, + "learning_rate": 2.659107564665794e-05, + "loss": 0.2013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19523614645004272, + "step": 1990, + "valid_targets_mean": 5063.4, + "valid_targets_min": 2050 + }, + { + "epoch": 3.186900958466454, + "grad_norm": 0.49163208262052766, + "learning_rate": 2.6515799267267445e-05, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22430217266082764, + "step": 1995, + "valid_targets_mean": 4761.1, + "valid_targets_min": 1387 + }, + { + "epoch": 3.194888178913738, + "grad_norm": 0.45501689903001435, + "learning_rate": 2.6440419480071366e-05, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17785659432411194, + "step": 2000, + "valid_targets_mean": 4346.3, + "valid_targets_min": 1566 + }, + { + "epoch": 3.202875399361022, + "grad_norm": 0.5011874844026918, + "learning_rate": 2.6364937481370887e-05, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20688596367835999, + "step": 2005, + "valid_targets_mean": 4911.2, + "valid_targets_min": 634 + }, + { + "epoch": 3.2108626198083066, + "grad_norm": 0.49758161473324886, + "learning_rate": 2.628935446908933e-05, + "loss": 0.1846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19779793918132782, + "step": 2010, + "valid_targets_mean": 5070.7, + "valid_targets_min": 1535 + }, + { + "epoch": 3.218849840255591, + "grad_norm": 0.4693482728109941, + "learning_rate": 2.6213671642753124e-05, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17302462458610535, + "step": 2015, + "valid_targets_mean": 5099.9, + "valid_targets_min": 1001 + }, + { + "epoch": 3.2268370607028753, + "grad_norm": 1.2388840168096895, + "learning_rate": 2.6137890203472786e-05, + "loss": 0.2081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20180654525756836, + "step": 2020, + "valid_targets_mean": 4193.2, + "valid_targets_min": 713 + }, + { + "epoch": 3.2348242811501597, + "grad_norm": 0.451425368532569, + "learning_rate": 2.606201135392383e-05, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17804279923439026, + "step": 2025, + "valid_targets_mean": 4523.9, + "valid_targets_min": 1052 + }, + { + "epoch": 3.242811501597444, + "grad_norm": 0.47890074437923297, + "learning_rate": 2.5986036298327725e-05, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1896950602531433, + "step": 2030, + "valid_targets_mean": 5309.2, + "valid_targets_min": 570 + }, + { + "epoch": 3.2507987220447285, + "grad_norm": 0.47760863077509297, + "learning_rate": 2.590996624243276e-05, + "loss": 0.1991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17177632451057434, + "step": 2035, + "valid_targets_mean": 4826.1, + "valid_targets_min": 1044 + }, + { + "epoch": 3.258785942492013, + "grad_norm": 0.44892367909412995, + "learning_rate": 2.5833802393494897e-05, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19597885012626648, + "step": 2040, + "valid_targets_mean": 5032.6, + "valid_targets_min": 1418 + }, + { + "epoch": 3.2667731629392973, + "grad_norm": 0.5222618703273273, + "learning_rate": 2.5757545960258638e-05, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16722160577774048, + "step": 2045, + "valid_targets_mean": 3837.4, + "valid_targets_min": 935 + }, + { + "epoch": 3.2747603833865817, + "grad_norm": 0.43989273642822957, + "learning_rate": 2.5681198152937804e-05, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1844216287136078, + "step": 2050, + "valid_targets_mean": 4744.3, + "valid_targets_min": 1090 + }, + { + "epoch": 3.2827476038338657, + "grad_norm": 0.4442392514686647, + "learning_rate": 2.5604760183196377e-05, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22049251198768616, + "step": 2055, + "valid_targets_mean": 5748.8, + "valid_targets_min": 1707 + }, + { + "epoch": 3.29073482428115, + "grad_norm": 0.478703439267164, + "learning_rate": 2.5528233264129228e-05, + "loss": 0.1941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22547464072704315, + "step": 2060, + "valid_targets_mean": 5527.6, + "valid_targets_min": 827 + }, + { + "epoch": 3.2987220447284344, + "grad_norm": 0.5588579373056553, + "learning_rate": 2.54516186102429e-05, + "loss": 0.1846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15393394231796265, + "step": 2065, + "valid_targets_mean": 3522.1, + "valid_targets_min": 1510 + }, + { + "epoch": 3.306709265175719, + "grad_norm": 0.41920245325374705, + "learning_rate": 2.5374917437436294e-05, + "loss": 0.185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19683489203453064, + "step": 2070, + "valid_targets_mean": 5266.0, + "valid_targets_min": 465 + }, + { + "epoch": 3.3146964856230032, + "grad_norm": 0.48083992951050875, + "learning_rate": 2.5298130962981403e-05, + "loss": 0.1867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24016697704792023, + "step": 2075, + "valid_targets_mean": 5231.2, + "valid_targets_min": 1409 + }, + { + "epoch": 3.3226837060702876, + "grad_norm": 0.6082496138674709, + "learning_rate": 2.5221260405503985e-05, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23262649774551392, + "step": 2080, + "valid_targets_mean": 3419.9, + "valid_targets_min": 1292 + }, + { + "epoch": 3.330670926517572, + "grad_norm": 0.4860773084588283, + "learning_rate": 2.5144306984964225e-05, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1687614917755127, + "step": 2085, + "valid_targets_mean": 3893.1, + "valid_targets_min": 776 + }, + { + "epoch": 3.3386581469648564, + "grad_norm": 0.42498160473633834, + "learning_rate": 2.5067271922637377e-05, + "loss": 0.1981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1992039680480957, + "step": 2090, + "valid_targets_mean": 4862.2, + "valid_targets_min": 2834 + }, + { + "epoch": 3.3466453674121404, + "grad_norm": 0.4258825619968381, + "learning_rate": 2.499015644109435e-05, + "loss": 0.1923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17836719751358032, + "step": 2095, + "valid_targets_mean": 4995.8, + "valid_targets_min": 1723 + }, + { + "epoch": 3.3546325878594248, + "grad_norm": 0.46930617839559285, + "learning_rate": 2.491296176418236e-05, + "loss": 0.1812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1799635887145996, + "step": 2100, + "valid_targets_mean": 4621.9, + "valid_targets_min": 1217 + }, + { + "epoch": 3.362619808306709, + "grad_norm": 0.6584943289712364, + "learning_rate": 2.4835689117005472e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20586544275283813, + "step": 2105, + "valid_targets_mean": 5239.2, + "valid_targets_min": 1339 + }, + { + "epoch": 3.3706070287539935, + "grad_norm": 0.45731375171363, + "learning_rate": 2.4758339725905152e-05, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.199666365981102, + "step": 2110, + "valid_targets_mean": 5336.4, + "valid_targets_min": 2376 + }, + { + "epoch": 3.378594249201278, + "grad_norm": 0.43058193211917495, + "learning_rate": 2.4680914818440823e-05, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16980770230293274, + "step": 2115, + "valid_targets_mean": 5046.4, + "valid_targets_min": 1190 + }, + { + "epoch": 3.3865814696485623, + "grad_norm": 0.4266548230980135, + "learning_rate": 2.4603415623370387e-05, + "loss": 0.1931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1820763200521469, + "step": 2120, + "valid_targets_mean": 5194.9, + "valid_targets_min": 896 + }, + { + "epoch": 3.3945686900958467, + "grad_norm": 0.45809211730759314, + "learning_rate": 2.4525843370630697e-05, + "loss": 0.1843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17680346965789795, + "step": 2125, + "valid_targets_mean": 4570.6, + "valid_targets_min": 792 + }, + { + "epoch": 3.402555910543131, + "grad_norm": 0.6772631764618228, + "learning_rate": 2.4448199291318058e-05, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1788870245218277, + "step": 2130, + "valid_targets_mean": 5122.0, + "valid_targets_min": 2157 + }, + { + "epoch": 3.4105431309904155, + "grad_norm": 0.39859839161760713, + "learning_rate": 2.4370484617668707e-05, + "loss": 0.1759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1690652221441269, + "step": 2135, + "valid_targets_mean": 5479.8, + "valid_targets_min": 1940 + }, + { + "epoch": 3.4185303514377, + "grad_norm": 0.6639191506771926, + "learning_rate": 2.4292700583039194e-05, + "loss": 0.1962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1719520092010498, + "step": 2140, + "valid_targets_mean": 4791.0, + "valid_targets_min": 490 + }, + { + "epoch": 3.426517571884984, + "grad_norm": 0.42668237867151115, + "learning_rate": 2.4214848421886893e-05, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18518546223640442, + "step": 2145, + "valid_targets_mean": 5195.2, + "valid_targets_min": 1095 + }, + { + "epoch": 3.4345047923322682, + "grad_norm": 0.6674059346938613, + "learning_rate": 2.4136929369750344e-05, + "loss": 0.2049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19148018956184387, + "step": 2150, + "valid_targets_mean": 4607.1, + "valid_targets_min": 1097 + }, + { + "epoch": 3.4424920127795526, + "grad_norm": 0.44213355246762426, + "learning_rate": 2.4058944663229672e-05, + "loss": 0.1841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19993336498737335, + "step": 2155, + "valid_targets_mean": 4826.8, + "valid_targets_min": 1777 + }, + { + "epoch": 3.450479233226837, + "grad_norm": 0.45019819902087, + "learning_rate": 2.398089553996696e-05, + "loss": 0.1899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18793454766273499, + "step": 2160, + "valid_targets_mean": 5297.8, + "valid_targets_min": 2098 + }, + { + "epoch": 3.4584664536741214, + "grad_norm": 0.4694883248667954, + "learning_rate": 2.3902783238626604e-05, + "loss": 0.1952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18550071120262146, + "step": 2165, + "valid_targets_mean": 4982.4, + "valid_targets_min": 2397 + }, + { + "epoch": 3.466453674121406, + "grad_norm": 0.5087812268690848, + "learning_rate": 2.3824608998875652e-05, + "loss": 0.1935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16924448311328888, + "step": 2170, + "valid_targets_mean": 4626.5, + "valid_targets_min": 813 + }, + { + "epoch": 3.47444089456869, + "grad_norm": 0.4032588568241463, + "learning_rate": 2.3746374061364138e-05, + "loss": 0.1874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15398786962032318, + "step": 2175, + "valid_targets_mean": 4960.7, + "valid_targets_min": 1709 + }, + { + "epoch": 3.4824281150159746, + "grad_norm": 0.4604507803407548, + "learning_rate": 2.3668079667705404e-05, + "loss": 0.1996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19481347501277924, + "step": 2180, + "valid_targets_mean": 4630.8, + "valid_targets_min": 1025 + }, + { + "epoch": 3.4904153354632586, + "grad_norm": 0.46373238726464927, + "learning_rate": 2.3589727060456342e-05, + "loss": 0.1847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2064315527677536, + "step": 2185, + "valid_targets_mean": 4385.4, + "valid_targets_min": 367 + }, + { + "epoch": 3.498402555910543, + "grad_norm": 0.5185491121751089, + "learning_rate": 2.3511317483097753e-05, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17522519826889038, + "step": 2190, + "valid_targets_mean": 4921.1, + "valid_targets_min": 543 + }, + { + "epoch": 3.5063897763578273, + "grad_norm": 0.47737053459541356, + "learning_rate": 2.3432852180014548e-05, + "loss": 0.2012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19549396634101868, + "step": 2195, + "valid_targets_mean": 5227.9, + "valid_targets_min": 1785 + }, + { + "epoch": 3.5143769968051117, + "grad_norm": 0.9355774311363018, + "learning_rate": 2.3354332396476026e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17945951223373413, + "step": 2200, + "valid_targets_mean": 4325.2, + "valid_targets_min": 625 + }, + { + "epoch": 3.522364217252396, + "grad_norm": 0.40783334210073574, + "learning_rate": 2.327575937861612e-05, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16562104225158691, + "step": 2205, + "valid_targets_mean": 5723.5, + "valid_targets_min": 1929 + }, + { + "epoch": 3.5303514376996805, + "grad_norm": 0.4527972615076226, + "learning_rate": 2.3197134373413595e-05, + "loss": 0.1892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22815603017807007, + "step": 2210, + "valid_targets_mean": 5091.8, + "valid_targets_min": 838 + }, + { + "epoch": 3.538338658146965, + "grad_norm": 0.47388983182379485, + "learning_rate": 2.3118458628672272e-05, + "loss": 0.1917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18909385800361633, + "step": 2215, + "valid_targets_mean": 4816.7, + "valid_targets_min": 1347 + }, + { + "epoch": 3.5463258785942493, + "grad_norm": 0.4250838131885362, + "learning_rate": 2.3039733393001233e-05, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2078056037425995, + "step": 2220, + "valid_targets_mean": 6035.9, + "valid_targets_min": 1642 + }, + { + "epoch": 3.5543130990415337, + "grad_norm": 0.4913873201029617, + "learning_rate": 2.2960959915794998e-05, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19340744614601135, + "step": 2225, + "valid_targets_mean": 4638.5, + "valid_targets_min": 643 + }, + { + "epoch": 3.562300319488818, + "grad_norm": 0.40839037061724476, + "learning_rate": 2.2882139447213673e-05, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16877365112304688, + "step": 2230, + "valid_targets_mean": 5349.2, + "valid_targets_min": 1593 + }, + { + "epoch": 3.5702875399361025, + "grad_norm": 0.4675729076472275, + "learning_rate": 2.280327323816317e-05, + "loss": 0.2037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.223651722073555, + "step": 2235, + "valid_targets_mean": 4993.3, + "valid_targets_min": 1785 + }, + { + "epoch": 3.5782747603833864, + "grad_norm": 0.5048609910600323, + "learning_rate": 2.2724362540275288e-05, + "loss": 0.1899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19978858530521393, + "step": 2240, + "valid_targets_mean": 3995.2, + "valid_targets_min": 1343 + }, + { + "epoch": 3.586261980830671, + "grad_norm": 1.0705437953436956, + "learning_rate": 2.2645408605887882e-05, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2068878561258316, + "step": 2245, + "valid_targets_mean": 4488.3, + "valid_targets_min": 860 + }, + { + "epoch": 3.594249201277955, + "grad_norm": 0.46081975658418306, + "learning_rate": 2.2566412688025003e-05, + "loss": 0.1976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17880478501319885, + "step": 2250, + "valid_targets_mean": 4766.0, + "valid_targets_min": 1981 + }, + { + "epoch": 3.6022364217252396, + "grad_norm": 0.49359472491199236, + "learning_rate": 2.2487376040376958e-05, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1768798828125, + "step": 2255, + "valid_targets_mean": 3792.6, + "valid_targets_min": 1838 + }, + { + "epoch": 3.610223642172524, + "grad_norm": 0.4522379907468209, + "learning_rate": 2.240829991728048e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18313640356063843, + "step": 2260, + "valid_targets_mean": 5166.9, + "valid_targets_min": 1360 + }, + { + "epoch": 3.6182108626198084, + "grad_norm": 0.4840769217866764, + "learning_rate": 2.2329185573698778e-05, + "loss": 0.196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18533888459205627, + "step": 2265, + "valid_targets_mean": 4982.4, + "valid_targets_min": 358 + }, + { + "epoch": 3.626198083067093, + "grad_norm": 0.4336567815547825, + "learning_rate": 2.225003426520163e-05, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18483304977416992, + "step": 2270, + "valid_targets_mean": 6042.6, + "valid_targets_min": 2980 + }, + { + "epoch": 3.6341853035143767, + "grad_norm": 0.4814668528965902, + "learning_rate": 2.2170847247945466e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18856918811798096, + "step": 2275, + "valid_targets_mean": 4287.3, + "valid_targets_min": 2095 + }, + { + "epoch": 3.642172523961661, + "grad_norm": 0.43225765042897346, + "learning_rate": 2.209162577865342e-05, + "loss": 0.1866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21974483132362366, + "step": 2280, + "valid_targets_mean": 5710.0, + "valid_targets_min": 772 + }, + { + "epoch": 3.6501597444089455, + "grad_norm": 0.5391791704251936, + "learning_rate": 2.201237111459539e-05, + "loss": 0.1989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19698813557624817, + "step": 2285, + "valid_targets_mean": 4624.1, + "valid_targets_min": 707 + }, + { + "epoch": 3.65814696485623, + "grad_norm": 0.46099188882052544, + "learning_rate": 2.193308451356809e-05, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18069559335708618, + "step": 2290, + "valid_targets_mean": 4457.1, + "valid_targets_min": 829 + }, + { + "epoch": 3.6661341853035143, + "grad_norm": 0.4316480786621061, + "learning_rate": 2.1853767233875084e-05, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2013440579175949, + "step": 2295, + "valid_targets_mean": 4825.4, + "valid_targets_min": 931 + }, + { + "epoch": 3.6741214057507987, + "grad_norm": 0.4305778870477568, + "learning_rate": 2.1774420534306812e-05, + "loss": 0.1941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17159786820411682, + "step": 2300, + "valid_targets_mean": 5305.0, + "valid_targets_min": 2340 + }, + { + "epoch": 3.682108626198083, + "grad_norm": 0.4675706015705595, + "learning_rate": 2.1695045674120615e-05, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1822780966758728, + "step": 2305, + "valid_targets_mean": 4895.9, + "valid_targets_min": 1010 + }, + { + "epoch": 3.6900958466453675, + "grad_norm": 0.49409976963921004, + "learning_rate": 2.1615643913020755e-05, + "loss": 0.1981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18701043725013733, + "step": 2310, + "valid_targets_mean": 4383.5, + "valid_targets_min": 601 + }, + { + "epoch": 3.698083067092652, + "grad_norm": 0.4247423680969255, + "learning_rate": 2.1536216511138423e-05, + "loss": 0.1701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15091648697853088, + "step": 2315, + "valid_targets_mean": 4364.2, + "valid_targets_min": 663 + }, + { + "epoch": 3.7060702875399363, + "grad_norm": 0.4152343558887996, + "learning_rate": 2.145676472901174e-05, + "loss": 0.185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18406620621681213, + "step": 2320, + "valid_targets_mean": 5993.6, + "valid_targets_min": 936 + }, + { + "epoch": 3.7140575079872207, + "grad_norm": 0.4548151227454303, + "learning_rate": 2.1377289827565734e-05, + "loss": 0.1875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19774073362350464, + "step": 2325, + "valid_targets_mean": 4848.2, + "valid_targets_min": 1688 + }, + { + "epoch": 3.722044728434505, + "grad_norm": 0.5024804371842532, + "learning_rate": 2.129779306809236e-05, + "loss": 0.1845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.183456152677536, + "step": 2330, + "valid_targets_mean": 3705.8, + "valid_targets_min": 806 + }, + { + "epoch": 3.730031948881789, + "grad_norm": 0.4418380559472342, + "learning_rate": 2.121827571223046e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18899336457252502, + "step": 2335, + "valid_targets_mean": 5399.1, + "valid_targets_min": 899 + }, + { + "epoch": 3.7380191693290734, + "grad_norm": 0.5754123014682926, + "learning_rate": 2.113873902194575e-05, + "loss": 0.1858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1759827435016632, + "step": 2340, + "valid_targets_mean": 4815.8, + "valid_targets_min": 1535 + }, + { + "epoch": 3.746006389776358, + "grad_norm": 0.5185080250250155, + "learning_rate": 2.1059184259510786e-05, + "loss": 0.1813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17565274238586426, + "step": 2345, + "valid_targets_mean": 4331.1, + "valid_targets_min": 620 + }, + { + "epoch": 3.753993610223642, + "grad_norm": 0.4877335595382726, + "learning_rate": 2.0979612687484935e-05, + "loss": 0.1786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18762779235839844, + "step": 2350, + "valid_targets_mean": 3845.9, + "valid_targets_min": 1194 + }, + { + "epoch": 3.7619808306709266, + "grad_norm": 0.43410975598882556, + "learning_rate": 2.0900025568694334e-05, + "loss": 0.1869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15621888637542725, + "step": 2355, + "valid_targets_mean": 4890.7, + "valid_targets_min": 1382 + }, + { + "epoch": 3.769968051118211, + "grad_norm": 0.48305637781442534, + "learning_rate": 2.082042416621187e-05, + "loss": 0.2007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.210737407207489, + "step": 2360, + "valid_targets_mean": 4516.4, + "valid_targets_min": 862 + }, + { + "epoch": 3.777955271565495, + "grad_norm": 0.5022806770180145, + "learning_rate": 2.0740809743337095e-05, + "loss": 0.191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18494747579097748, + "step": 2365, + "valid_targets_mean": 4611.1, + "valid_targets_min": 680 + }, + { + "epoch": 3.7859424920127793, + "grad_norm": 0.5581835227268139, + "learning_rate": 2.0661183563576202e-05, + "loss": 0.192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2176152467727661, + "step": 2370, + "valid_targets_mean": 4084.2, + "valid_targets_min": 320 + }, + { + "epoch": 3.7939297124600637, + "grad_norm": 0.5241288006020112, + "learning_rate": 2.0581546890621986e-05, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18710482120513916, + "step": 2375, + "valid_targets_mean": 4911.0, + "valid_targets_min": 732 + }, + { + "epoch": 3.801916932907348, + "grad_norm": 0.43506359784608306, + "learning_rate": 2.0501900988333755e-05, + "loss": 0.1759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17661073803901672, + "step": 2380, + "valid_targets_mean": 5269.9, + "valid_targets_min": 1674 + }, + { + "epoch": 3.8099041533546325, + "grad_norm": 0.47473570226755557, + "learning_rate": 2.0422247120717304e-05, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19318652153015137, + "step": 2385, + "valid_targets_mean": 4951.1, + "valid_targets_min": 1510 + }, + { + "epoch": 3.817891373801917, + "grad_norm": 0.50739056377129, + "learning_rate": 2.0342586551904824e-05, + "loss": 0.2081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20048654079437256, + "step": 2390, + "valid_targets_mean": 4932.8, + "valid_targets_min": 1456 + }, + { + "epoch": 3.8258785942492013, + "grad_norm": 0.5076070725831601, + "learning_rate": 2.026292054613486e-05, + "loss": 0.1941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21501141786575317, + "step": 2395, + "valid_targets_mean": 4507.7, + "valid_targets_min": 617 + }, + { + "epoch": 3.8338658146964857, + "grad_norm": 0.5532415716131805, + "learning_rate": 2.0183250367732253e-05, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20856130123138428, + "step": 2400, + "valid_targets_mean": 4624.4, + "valid_targets_min": 682 + }, + { + "epoch": 3.84185303514377, + "grad_norm": 0.45671458342233145, + "learning_rate": 2.0103577281088065e-05, + "loss": 0.184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18179446458816528, + "step": 2405, + "valid_targets_mean": 4898.2, + "valid_targets_min": 1537 + }, + { + "epoch": 3.8498402555910545, + "grad_norm": 0.4759919873634848, + "learning_rate": 2.00239025506395e-05, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21135717630386353, + "step": 2410, + "valid_targets_mean": 5055.8, + "valid_targets_min": 1394 + }, + { + "epoch": 3.857827476038339, + "grad_norm": 0.46319188180737886, + "learning_rate": 1.9944227440849856e-05, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16708174347877502, + "step": 2415, + "valid_targets_mean": 4287.0, + "valid_targets_min": 1070 + }, + { + "epoch": 3.8658146964856233, + "grad_norm": 0.5031704543649632, + "learning_rate": 1.9864553216188454e-05, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21082769334316254, + "step": 2420, + "valid_targets_mean": 4561.1, + "valid_targets_min": 996 + }, + { + "epoch": 3.873801916932907, + "grad_norm": 0.5225932563691561, + "learning_rate": 1.978488114111057e-05, + "loss": 0.1711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18447518348693848, + "step": 2425, + "valid_targets_mean": 4778.2, + "valid_targets_min": 1629 + }, + { + "epoch": 3.8817891373801916, + "grad_norm": 0.4478785968060911, + "learning_rate": 1.970521248003735e-05, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18033427000045776, + "step": 2430, + "valid_targets_mean": 5120.3, + "valid_targets_min": 721 + }, + { + "epoch": 3.889776357827476, + "grad_norm": 0.44839878262804944, + "learning_rate": 1.9625548497335783e-05, + "loss": 0.1894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2015233039855957, + "step": 2435, + "valid_targets_mean": 5338.6, + "valid_targets_min": 2090 + }, + { + "epoch": 3.8977635782747604, + "grad_norm": 0.5033175049463399, + "learning_rate": 1.9545890457298592e-05, + "loss": 0.1763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17985785007476807, + "step": 2440, + "valid_targets_mean": 4605.1, + "valid_targets_min": 854 + }, + { + "epoch": 3.905750798722045, + "grad_norm": 0.6939870310820266, + "learning_rate": 1.9466239624124203e-05, + "loss": 0.1811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19669967889785767, + "step": 2445, + "valid_targets_mean": 4070.8, + "valid_targets_min": 933 + }, + { + "epoch": 3.913738019169329, + "grad_norm": 0.43504461484987295, + "learning_rate": 1.938659726189663e-05, + "loss": 0.1897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20107465982437134, + "step": 2450, + "valid_targets_mean": 5109.3, + "valid_targets_min": 1514 + }, + { + "epoch": 3.9217252396166136, + "grad_norm": 0.49934803104249825, + "learning_rate": 1.9306964634565505e-05, + "loss": 0.1857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19940102100372314, + "step": 2455, + "valid_targets_mean": 5095.8, + "valid_targets_min": 1756 + }, + { + "epoch": 3.9297124600638975, + "grad_norm": 0.3953376883568536, + "learning_rate": 1.9227343005925926e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17035652697086334, + "step": 2460, + "valid_targets_mean": 5663.9, + "valid_targets_min": 1635 + }, + { + "epoch": 3.937699680511182, + "grad_norm": 0.4369568213838696, + "learning_rate": 1.914773363959845e-05, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19834986329078674, + "step": 2465, + "valid_targets_mean": 5365.0, + "valid_targets_min": 1859 + }, + { + "epoch": 3.9456869009584663, + "grad_norm": 0.5030102742032438, + "learning_rate": 1.906813779900903e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1819533109664917, + "step": 2470, + "valid_targets_mean": 4127.9, + "valid_targets_min": 1521 + }, + { + "epoch": 3.9536741214057507, + "grad_norm": 0.4731427364779601, + "learning_rate": 1.8988556747368953e-05, + "loss": 0.1894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17888499796390533, + "step": 2475, + "valid_targets_mean": 4367.8, + "valid_targets_min": 785 + }, + { + "epoch": 3.961661341853035, + "grad_norm": 0.5557284048366862, + "learning_rate": 1.890899174765481e-05, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18574580550193787, + "step": 2480, + "valid_targets_mean": 4279.9, + "valid_targets_min": 1718 + }, + { + "epoch": 3.9696485623003195, + "grad_norm": 0.47991964027385436, + "learning_rate": 1.8829444062588433e-05, + "loss": 0.1917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19404660165309906, + "step": 2485, + "valid_targets_mean": 4322.4, + "valid_targets_min": 857 + }, + { + "epoch": 3.977635782747604, + "grad_norm": 0.42202690145985944, + "learning_rate": 1.8749914954616882e-05, + "loss": 0.1762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14783018827438354, + "step": 2490, + "valid_targets_mean": 5029.9, + "valid_targets_min": 1786 + }, + { + "epoch": 3.9856230031948883, + "grad_norm": 0.4410364424689328, + "learning_rate": 1.867040568589236e-05, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15357394516468048, + "step": 2495, + "valid_targets_mean": 4057.6, + "valid_targets_min": 1562 + }, + { + "epoch": 3.9936102236421727, + "grad_norm": 0.5197355081404011, + "learning_rate": 1.859091751825225e-05, + "loss": 0.1952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20624461770057678, + "step": 2500, + "valid_targets_mean": 4384.0, + "valid_targets_min": 611 + }, + { + "epoch": 4.001597444089457, + "grad_norm": 0.4699014479471355, + "learning_rate": 1.8511451713199038e-05, + "loss": 0.1917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20383623242378235, + "step": 2505, + "valid_targets_mean": 4910.9, + "valid_targets_min": 1243 + }, + { + "epoch": 4.0095846645367414, + "grad_norm": 0.44610459030943744, + "learning_rate": 1.843200953188031e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1724563091993332, + "step": 2510, + "valid_targets_mean": 4851.7, + "valid_targets_min": 1271 + }, + { + "epoch": 4.017571884984026, + "grad_norm": 0.5342425830230646, + "learning_rate": 1.8352592235068735e-05, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17269504070281982, + "step": 2515, + "valid_targets_mean": 4625.4, + "valid_targets_min": 1260 + }, + { + "epoch": 4.02555910543131, + "grad_norm": 0.4544261008139438, + "learning_rate": 1.8273201083142056e-05, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15527205169200897, + "step": 2520, + "valid_targets_mean": 4469.7, + "valid_targets_min": 770 + }, + { + "epoch": 4.033546325878595, + "grad_norm": 0.45777968965714255, + "learning_rate": 1.8193837336063096e-05, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18351805210113525, + "step": 2525, + "valid_targets_mean": 5344.1, + "valid_targets_min": 1393 + }, + { + "epoch": 4.041533546325879, + "grad_norm": 0.48743720071330254, + "learning_rate": 1.8114502253359745e-05, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15608066320419312, + "step": 2530, + "valid_targets_mean": 4386.8, + "valid_targets_min": 806 + }, + { + "epoch": 4.0495207667731625, + "grad_norm": 0.5102889380748086, + "learning_rate": 1.8035197094104986e-05, + "loss": 0.1627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16816166043281555, + "step": 2535, + "valid_targets_mean": 4606.9, + "valid_targets_min": 1176 + }, + { + "epoch": 4.057507987220447, + "grad_norm": 0.49384781849437476, + "learning_rate": 1.7955923116896908e-05, + "loss": 0.183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1960548460483551, + "step": 2540, + "valid_targets_mean": 4988.2, + "valid_targets_min": 1467 + }, + { + "epoch": 4.065495207667731, + "grad_norm": 0.47524965354500476, + "learning_rate": 1.787668157983872e-05, + "loss": 0.1804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16807019710540771, + "step": 2545, + "valid_targets_mean": 5086.1, + "valid_targets_min": 667 + }, + { + "epoch": 4.073482428115016, + "grad_norm": 0.4840983111087558, + "learning_rate": 1.779747374051881e-05, + "loss": 0.1824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.159114807844162, + "step": 2550, + "valid_targets_mean": 4995.7, + "valid_targets_min": 1213 + }, + { + "epoch": 4.0814696485623, + "grad_norm": 0.45450548714319483, + "learning_rate": 1.7718300855990767e-05, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19937041401863098, + "step": 2555, + "valid_targets_mean": 5728.3, + "valid_targets_min": 1841 + }, + { + "epoch": 4.0894568690095845, + "grad_norm": 0.45562843993445606, + "learning_rate": 1.7639164182753434e-05, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1393558382987976, + "step": 2560, + "valid_targets_mean": 4458.7, + "valid_targets_min": 1285 + }, + { + "epoch": 4.097444089456869, + "grad_norm": 0.542728331932225, + "learning_rate": 1.7560064976730967e-05, + "loss": 0.1805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16445447504520416, + "step": 2565, + "valid_targets_mean": 4540.4, + "valid_targets_min": 1036 + }, + { + "epoch": 4.105431309904153, + "grad_norm": 0.44164521814861474, + "learning_rate": 1.7481004493252915e-05, + "loss": 0.183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17348837852478027, + "step": 2570, + "valid_targets_mean": 5027.4, + "valid_targets_min": 917 + }, + { + "epoch": 4.113418530351438, + "grad_norm": 0.4759337103330901, + "learning_rate": 1.740198398703428e-05, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19265320897102356, + "step": 2575, + "valid_targets_mean": 5355.0, + "valid_targets_min": 1435 + }, + { + "epoch": 4.121405750798722, + "grad_norm": 0.505547502504362, + "learning_rate": 1.7323004712155613e-05, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.152913436293602, + "step": 2580, + "valid_targets_mean": 5178.0, + "valid_targets_min": 996 + }, + { + "epoch": 4.1293929712460065, + "grad_norm": 0.48447911256072, + "learning_rate": 1.724406792204312e-05, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18921968340873718, + "step": 2585, + "valid_targets_mean": 5014.1, + "valid_targets_min": 1543 + }, + { + "epoch": 4.137380191693291, + "grad_norm": 0.5964352002018138, + "learning_rate": 1.7165174869448738e-05, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19785529375076294, + "step": 2590, + "valid_targets_mean": 4668.9, + "valid_targets_min": 2454 + }, + { + "epoch": 4.145367412140575, + "grad_norm": 0.5362905258520115, + "learning_rate": 1.7086326806430298e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19170859456062317, + "step": 2595, + "valid_targets_mean": 4329.1, + "valid_targets_min": 1001 + }, + { + "epoch": 4.15335463258786, + "grad_norm": 0.5063383812064016, + "learning_rate": 1.7007524984331623e-05, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14627587795257568, + "step": 2600, + "valid_targets_mean": 4513.1, + "valid_targets_min": 1287 + }, + { + "epoch": 4.161341853035144, + "grad_norm": 0.4571185442372499, + "learning_rate": 1.692877065376268e-05, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20067718625068665, + "step": 2605, + "valid_targets_mean": 5342.1, + "valid_targets_min": 1884 + }, + { + "epoch": 4.169329073482428, + "grad_norm": 0.5531440170736265, + "learning_rate": 1.6850065064579724e-05, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1775854527950287, + "step": 2610, + "valid_targets_mean": 3792.1, + "valid_targets_min": 2240 + }, + { + "epoch": 4.177316293929713, + "grad_norm": 0.4707463377254241, + "learning_rate": 1.677140946586548e-05, + "loss": 0.1789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1646420657634735, + "step": 2615, + "valid_targets_mean": 4516.5, + "valid_targets_min": 721 + }, + { + "epoch": 4.185303514376997, + "grad_norm": 0.5068778613138127, + "learning_rate": 1.66928051059093e-05, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19891464710235596, + "step": 2620, + "valid_targets_mean": 4863.6, + "valid_targets_min": 2881 + }, + { + "epoch": 4.193290734824281, + "grad_norm": 0.48265152952562435, + "learning_rate": 1.6614253232187363e-05, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21110501885414124, + "step": 2625, + "valid_targets_mean": 5051.4, + "valid_targets_min": 912 + }, + { + "epoch": 4.201277955271565, + "grad_norm": 0.4701304397256498, + "learning_rate": 1.6535755091342876e-05, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16952627897262573, + "step": 2630, + "valid_targets_mean": 4840.1, + "valid_targets_min": 1343 + }, + { + "epoch": 4.2092651757188495, + "grad_norm": 0.47877634257198126, + "learning_rate": 1.6457311929166286e-05, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1714383214712143, + "step": 2635, + "valid_targets_mean": 4488.4, + "valid_targets_min": 657 + }, + { + "epoch": 4.217252396166134, + "grad_norm": 0.4932893039114935, + "learning_rate": 1.637892499057551e-05, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17138069868087769, + "step": 2640, + "valid_targets_mean": 4592.1, + "valid_targets_min": 819 + }, + { + "epoch": 4.225239616613418, + "grad_norm": 0.43679655489643965, + "learning_rate": 1.6300595519596177e-05, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15296795964241028, + "step": 2645, + "valid_targets_mean": 4857.1, + "valid_targets_min": 966 + }, + { + "epoch": 4.233226837060703, + "grad_norm": 0.6163573944435584, + "learning_rate": 1.6222324759341897e-05, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17561312019824982, + "step": 2650, + "valid_targets_mean": 4857.5, + "valid_targets_min": 813 + }, + { + "epoch": 4.241214057507987, + "grad_norm": 0.521141354455912, + "learning_rate": 1.6144113951994515e-05, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.145559623837471, + "step": 2655, + "valid_targets_mean": 3869.6, + "valid_targets_min": 561 + }, + { + "epoch": 4.2492012779552715, + "grad_norm": 0.4466498466808147, + "learning_rate": 1.60659643387844e-05, + "loss": 0.162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15559345483779907, + "step": 2660, + "valid_targets_mean": 5260.7, + "valid_targets_min": 643 + }, + { + "epoch": 4.257188498402556, + "grad_norm": 0.5191430163552699, + "learning_rate": 1.5987877159970765e-05, + "loss": 0.1965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23058605194091797, + "step": 2665, + "valid_targets_mean": 4721.8, + "valid_targets_min": 2121 + }, + { + "epoch": 4.26517571884984, + "grad_norm": 0.49558878513171556, + "learning_rate": 1.5909853654821947e-05, + "loss": 0.1851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23383599519729614, + "step": 2670, + "valid_targets_mean": 4648.8, + "valid_targets_min": 1276 + }, + { + "epoch": 4.273162939297125, + "grad_norm": 0.41929939973228997, + "learning_rate": 1.583189506159579e-05, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1661728322505951, + "step": 2675, + "valid_targets_mean": 6055.9, + "valid_targets_min": 1690 + }, + { + "epoch": 4.281150159744409, + "grad_norm": 0.4335583132231686, + "learning_rate": 1.575400261751993e-05, + "loss": 0.1765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16649028658866882, + "step": 2680, + "valid_targets_mean": 5270.9, + "valid_targets_min": 1863 + }, + { + "epoch": 4.289137380191693, + "grad_norm": 0.5033624044429718, + "learning_rate": 1.567617755877223e-05, + "loss": 0.1796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21152611076831818, + "step": 2685, + "valid_targets_mean": 4367.6, + "valid_targets_min": 596 + }, + { + "epoch": 4.297124600638978, + "grad_norm": 0.4637755962720511, + "learning_rate": 1.5598421120461106e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2031022310256958, + "step": 2690, + "valid_targets_mean": 5368.4, + "valid_targets_min": 1109 + }, + { + "epoch": 4.305111821086262, + "grad_norm": 0.4756304172705233, + "learning_rate": 1.552073453660595e-05, + "loss": 0.1763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19398033618927002, + "step": 2695, + "valid_targets_mean": 5216.4, + "valid_targets_min": 2283 + }, + { + "epoch": 4.313099041533547, + "grad_norm": 0.5287810944260913, + "learning_rate": 1.5443119040117544e-05, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18938207626342773, + "step": 2700, + "valid_targets_mean": 3709.1, + "valid_targets_min": 1009 + }, + { + "epoch": 4.321086261980831, + "grad_norm": 0.5088421190380102, + "learning_rate": 1.536557586277849e-05, + "loss": 0.1789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.209833025932312, + "step": 2705, + "valid_targets_mean": 5181.1, + "valid_targets_min": 1443 + }, + { + "epoch": 4.329073482428115, + "grad_norm": 0.42223055697102546, + "learning_rate": 1.5288106235223655e-05, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13359196484088898, + "step": 2710, + "valid_targets_mean": 4941.1, + "valid_targets_min": 1419 + }, + { + "epoch": 4.3370607028754, + "grad_norm": 0.444464250701453, + "learning_rate": 1.521071138692066e-05, + "loss": 0.1606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14987829327583313, + "step": 2715, + "valid_targets_mean": 4453.1, + "valid_targets_min": 405 + }, + { + "epoch": 4.345047923322683, + "grad_norm": 0.4876366454348983, + "learning_rate": 1.5133392546150347e-05, + "loss": 0.1897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21039533615112305, + "step": 2720, + "valid_targets_mean": 4893.1, + "valid_targets_min": 817 + }, + { + "epoch": 4.353035143769968, + "grad_norm": 0.49731859469202605, + "learning_rate": 1.5056150939987303e-05, + "loss": 0.1799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19343283772468567, + "step": 2725, + "valid_targets_mean": 4593.2, + "valid_targets_min": 762 + }, + { + "epoch": 4.361022364217252, + "grad_norm": 0.4727820962488383, + "learning_rate": 1.4978987794280363e-05, + "loss": 0.1745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16634801030158997, + "step": 2730, + "valid_targets_mean": 4974.9, + "valid_targets_min": 1897 + }, + { + "epoch": 4.3690095846645365, + "grad_norm": 0.4837015825511962, + "learning_rate": 1.4901904333633179e-05, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1642087996006012, + "step": 2735, + "valid_targets_mean": 4487.1, + "valid_targets_min": 765 + }, + { + "epoch": 4.376996805111821, + "grad_norm": 0.4568084735081673, + "learning_rate": 1.4824901781384777e-05, + "loss": 0.1695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16266146302223206, + "step": 2740, + "valid_targets_mean": 4704.2, + "valid_targets_min": 590 + }, + { + "epoch": 4.384984025559105, + "grad_norm": 0.5317320056579059, + "learning_rate": 1.4747981359590138e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17885902523994446, + "step": 2745, + "valid_targets_mean": 5585.6, + "valid_targets_min": 2346 + }, + { + "epoch": 4.39297124600639, + "grad_norm": 0.47355838191742555, + "learning_rate": 1.4671144289000806e-05, + "loss": 0.1836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1866658627986908, + "step": 2750, + "valid_targets_mean": 4586.2, + "valid_targets_min": 860 + }, + { + "epoch": 4.400958466453674, + "grad_norm": 0.4596060420334498, + "learning_rate": 1.4594391789045513e-05, + "loss": 0.181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14949265122413635, + "step": 2755, + "valid_targets_mean": 4949.5, + "valid_targets_min": 808 + }, + { + "epoch": 4.4089456869009584, + "grad_norm": 0.4902124697286356, + "learning_rate": 1.4517725077810835e-05, + "loss": 0.1803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18611708283424377, + "step": 2760, + "valid_targets_mean": 4757.1, + "valid_targets_min": 2281 + }, + { + "epoch": 4.416932907348243, + "grad_norm": 0.4397051481953006, + "learning_rate": 1.4441145372021847e-05, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16182464361190796, + "step": 2765, + "valid_targets_mean": 5520.4, + "valid_targets_min": 1653 + }, + { + "epoch": 4.424920127795527, + "grad_norm": 0.49132049788954635, + "learning_rate": 1.4364653887022831e-05, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.183456152677536, + "step": 2770, + "valid_targets_mean": 4734.7, + "valid_targets_min": 1523 + }, + { + "epoch": 4.432907348242812, + "grad_norm": 0.5022396485100658, + "learning_rate": 1.4288251836757956e-05, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16363565623760223, + "step": 2775, + "valid_targets_mean": 4242.3, + "valid_targets_min": 1927 + }, + { + "epoch": 4.440894568690096, + "grad_norm": 0.46423648756327707, + "learning_rate": 1.4211940433752052e-05, + "loss": 0.1708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1824829876422882, + "step": 2780, + "valid_targets_mean": 5115.5, + "valid_targets_min": 747 + }, + { + "epoch": 4.44888178913738, + "grad_norm": 0.5437782791421605, + "learning_rate": 1.4135720889091342e-05, + "loss": 0.1893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24194525182247162, + "step": 2785, + "valid_targets_mean": 3960.6, + "valid_targets_min": 665 + }, + { + "epoch": 4.456869009584665, + "grad_norm": 0.47207100051740214, + "learning_rate": 1.4059594412404227e-05, + "loss": 0.1695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15635071694850922, + "step": 2790, + "valid_targets_mean": 4844.9, + "valid_targets_min": 770 + }, + { + "epoch": 4.464856230031949, + "grad_norm": 0.7725352514241253, + "learning_rate": 1.3983562211842094e-05, + "loss": 0.1754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16280369460582733, + "step": 2795, + "valid_targets_mean": 4732.6, + "valid_targets_min": 2517 + }, + { + "epoch": 4.472843450479234, + "grad_norm": 0.556527290971323, + "learning_rate": 1.390762549406013e-05, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19617465138435364, + "step": 2800, + "valid_targets_mean": 4603.3, + "valid_targets_min": 896 + }, + { + "epoch": 4.480830670926517, + "grad_norm": 0.4522080581527669, + "learning_rate": 1.3831785464198188e-05, + "loss": 0.182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17426979541778564, + "step": 2805, + "valid_targets_mean": 5406.4, + "valid_targets_min": 1533 + }, + { + "epoch": 4.488817891373802, + "grad_norm": 0.4491497927742097, + "learning_rate": 1.3756043325861648e-05, + "loss": 0.1685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1735353171825409, + "step": 2810, + "valid_targets_mean": 5382.9, + "valid_targets_min": 1207 + }, + { + "epoch": 4.496805111821086, + "grad_norm": 0.5070465660484962, + "learning_rate": 1.3680400281102326e-05, + "loss": 0.1786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19638991355895996, + "step": 2815, + "valid_targets_mean": 4763.1, + "valid_targets_min": 1971 + }, + { + "epoch": 4.50479233226837, + "grad_norm": 0.4717610993795897, + "learning_rate": 1.3604857530399378e-05, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1694454550743103, + "step": 2820, + "valid_targets_mean": 4820.9, + "valid_targets_min": 720 + }, + { + "epoch": 4.512779552715655, + "grad_norm": 0.4228838522176023, + "learning_rate": 1.3529416272640278e-05, + "loss": 0.1693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14357447624206543, + "step": 2825, + "valid_targets_mean": 5209.4, + "valid_targets_min": 1897 + }, + { + "epoch": 4.520766773162939, + "grad_norm": 0.46720980296374576, + "learning_rate": 1.3454077705101766e-05, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1450195610523224, + "step": 2830, + "valid_targets_mean": 5413.9, + "valid_targets_min": 2133 + }, + { + "epoch": 4.5287539936102235, + "grad_norm": 0.4595243786600848, + "learning_rate": 1.3378843023430856e-05, + "loss": 0.1804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19309571385383606, + "step": 2835, + "valid_targets_mean": 5553.4, + "valid_targets_min": 1046 + }, + { + "epoch": 4.536741214057508, + "grad_norm": 0.4859604764765627, + "learning_rate": 1.3303713421625864e-05, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1785299926996231, + "step": 2840, + "valid_targets_mean": 5517.8, + "valid_targets_min": 1231 + }, + { + "epoch": 4.544728434504792, + "grad_norm": 0.513803510247055, + "learning_rate": 1.3228690092017452e-05, + "loss": 0.1747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16955965757369995, + "step": 2845, + "valid_targets_mean": 5190.0, + "valid_targets_min": 1084 + }, + { + "epoch": 4.552715654952077, + "grad_norm": 0.45254755090215193, + "learning_rate": 1.3153774225249705e-05, + "loss": 0.1803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15954682230949402, + "step": 2850, + "valid_targets_mean": 4924.6, + "valid_targets_min": 1679 + }, + { + "epoch": 4.560702875399361, + "grad_norm": 0.4588250749939126, + "learning_rate": 1.3078967010261243e-05, + "loss": 0.1828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16771024465560913, + "step": 2855, + "valid_targets_mean": 5032.6, + "valid_targets_min": 1681 + }, + { + "epoch": 4.568690095846645, + "grad_norm": 0.45471209980664273, + "learning_rate": 1.3004269634266357e-05, + "loss": 0.1841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18457409739494324, + "step": 2860, + "valid_targets_mean": 5483.9, + "valid_targets_min": 1857 + }, + { + "epoch": 4.57667731629393, + "grad_norm": 0.5381067225547658, + "learning_rate": 1.2929683282736135e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19067108631134033, + "step": 2865, + "valid_targets_mean": 4096.4, + "valid_targets_min": 806 + }, + { + "epoch": 4.584664536741214, + "grad_norm": 0.4900463699350829, + "learning_rate": 1.285520913937969e-05, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1881490796804428, + "step": 2870, + "valid_targets_mean": 4364.2, + "valid_targets_min": 855 + }, + { + "epoch": 4.592651757188499, + "grad_norm": 0.4331601483409343, + "learning_rate": 1.2780848386125354e-05, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15843982994556427, + "step": 2875, + "valid_targets_mean": 4837.6, + "valid_targets_min": 625 + }, + { + "epoch": 4.600638977635783, + "grad_norm": 0.42410902438561765, + "learning_rate": 1.2706602203101913e-05, + "loss": 0.1728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1685314178466797, + "step": 2880, + "valid_targets_mean": 5168.2, + "valid_targets_min": 2244 + }, + { + "epoch": 4.608626198083067, + "grad_norm": 0.44351881797251413, + "learning_rate": 1.2632471768619894e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14001521468162537, + "step": 2885, + "valid_targets_mean": 4807.2, + "valid_targets_min": 753 + }, + { + "epoch": 4.616613418530352, + "grad_norm": 0.5210509360425715, + "learning_rate": 1.255845825915286e-05, + "loss": 0.1663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.163040429353714, + "step": 2890, + "valid_targets_mean": 3828.9, + "valid_targets_min": 1267 + }, + { + "epoch": 4.624600638977636, + "grad_norm": 0.4940486964325761, + "learning_rate": 1.248456284931873e-05, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17665278911590576, + "step": 2895, + "valid_targets_mean": 4694.3, + "valid_targets_min": 682 + }, + { + "epoch": 4.63258785942492, + "grad_norm": 0.5312612354322576, + "learning_rate": 1.241078671186115e-05, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1842440664768219, + "step": 2900, + "valid_targets_mean": 4051.1, + "valid_targets_min": 553 + }, + { + "epoch": 4.640575079872205, + "grad_norm": 0.5199309221773137, + "learning_rate": 1.2337131017630873e-05, + "loss": 0.1716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1977030634880066, + "step": 2905, + "valid_targets_mean": 3997.2, + "valid_targets_min": 803 + }, + { + "epoch": 4.6485623003194885, + "grad_norm": 0.49556792854851944, + "learning_rate": 1.2263596935567175e-05, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17178234457969666, + "step": 2910, + "valid_targets_mean": 4453.0, + "valid_targets_min": 1163 + }, + { + "epoch": 4.656549520766773, + "grad_norm": 0.4918010969834742, + "learning_rate": 1.2190185632679316e-05, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2153959572315216, + "step": 2915, + "valid_targets_mean": 5196.2, + "valid_targets_min": 2456 + }, + { + "epoch": 4.664536741214057, + "grad_norm": 0.5504834449476904, + "learning_rate": 1.2116898274028008e-05, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17798376083374023, + "step": 2920, + "valid_targets_mean": 4049.2, + "valid_targets_min": 1668 + }, + { + "epoch": 4.672523961661342, + "grad_norm": 0.441802781277938, + "learning_rate": 1.2043736022706934e-05, + "loss": 0.1796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15885400772094727, + "step": 2925, + "valid_targets_mean": 4954.7, + "valid_targets_min": 1132 + }, + { + "epoch": 4.680511182108626, + "grad_norm": 0.5473584807237512, + "learning_rate": 1.1970700039824271e-05, + "loss": 0.1816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21114695072174072, + "step": 2930, + "valid_targets_mean": 4388.6, + "valid_targets_min": 792 + }, + { + "epoch": 4.68849840255591, + "grad_norm": 0.4338073131012635, + "learning_rate": 1.1897791484484284e-05, + "loss": 0.1699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15443475544452667, + "step": 2935, + "valid_targets_mean": 4838.2, + "valid_targets_min": 874 + }, + { + "epoch": 4.696485623003195, + "grad_norm": 0.4625102868784867, + "learning_rate": 1.182501151376893e-05, + "loss": 0.1811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19839084148406982, + "step": 2940, + "valid_targets_mean": 5425.4, + "valid_targets_min": 3203 + }, + { + "epoch": 4.704472843450479, + "grad_norm": 0.466180176987445, + "learning_rate": 1.1752361282719472e-05, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1526743769645691, + "step": 2945, + "valid_targets_mean": 4683.6, + "valid_targets_min": 1552 + }, + { + "epoch": 4.712460063897764, + "grad_norm": 0.47551837484440984, + "learning_rate": 1.1679841944318171e-05, + "loss": 0.1763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17545554041862488, + "step": 2950, + "valid_targets_mean": 4613.1, + "valid_targets_min": 420 + }, + { + "epoch": 4.720447284345048, + "grad_norm": 0.39710037175579455, + "learning_rate": 1.1607454649469991e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.167159304022789, + "step": 2955, + "valid_targets_mean": 6828.9, + "valid_targets_min": 1557 + }, + { + "epoch": 4.728434504792332, + "grad_norm": 0.45927767227442196, + "learning_rate": 1.1535200546984291e-05, + "loss": 0.1731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13732823729515076, + "step": 2960, + "valid_targets_mean": 4008.6, + "valid_targets_min": 336 + }, + { + "epoch": 4.736421725239617, + "grad_norm": 0.4457328214905737, + "learning_rate": 1.1463080783556663e-05, + "loss": 0.1742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1593000888824463, + "step": 2965, + "valid_targets_mean": 4668.5, + "valid_targets_min": 2312 + }, + { + "epoch": 4.744408945686901, + "grad_norm": 0.45706099623559, + "learning_rate": 1.139109650375066e-05, + "loss": 0.1663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15481191873550415, + "step": 2970, + "valid_targets_mean": 4386.1, + "valid_targets_min": 557 + }, + { + "epoch": 4.752396166134186, + "grad_norm": 0.4384834971282695, + "learning_rate": 1.1319248849979705e-05, + "loss": 0.1805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17784811556339264, + "step": 2975, + "valid_targets_mean": 5961.6, + "valid_targets_min": 924 + }, + { + "epoch": 4.76038338658147, + "grad_norm": 0.4580626059711276, + "learning_rate": 1.1247538962488882e-05, + "loss": 0.1784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1837904453277588, + "step": 2980, + "valid_targets_mean": 5080.2, + "valid_targets_min": 1940 + }, + { + "epoch": 4.768370607028754, + "grad_norm": 0.4769567025754341, + "learning_rate": 1.1175967979336913e-05, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15448135137557983, + "step": 2985, + "valid_targets_mean": 3807.9, + "valid_targets_min": 566 + }, + { + "epoch": 4.776357827476039, + "grad_norm": 0.4453754802734512, + "learning_rate": 1.1104537036378054e-05, + "loss": 0.1654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13982991874217987, + "step": 2990, + "valid_targets_mean": 5076.4, + "valid_targets_min": 1683 + }, + { + "epoch": 4.784345047923322, + "grad_norm": 1.2063322370643315, + "learning_rate": 1.1033247267244063e-05, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14099296927452087, + "step": 2995, + "valid_targets_mean": 5085.6, + "valid_targets_min": 2205 + }, + { + "epoch": 4.792332268370607, + "grad_norm": 0.5668162128968549, + "learning_rate": 1.0962099803326257e-05, + "loss": 0.1744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16944657266139984, + "step": 3000, + "valid_targets_mean": 4677.1, + "valid_targets_min": 2127 + }, + { + "epoch": 4.800319488817891, + "grad_norm": 0.5039649642727122, + "learning_rate": 1.089109577375748e-05, + "loss": 0.1821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20508737862110138, + "step": 3005, + "valid_targets_mean": 5167.5, + "valid_targets_min": 2174 + }, + { + "epoch": 4.8083067092651754, + "grad_norm": 0.46177989402877045, + "learning_rate": 1.082023630539427e-05, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19288091361522675, + "step": 3010, + "valid_targets_mean": 5167.2, + "valid_targets_min": 614 + }, + { + "epoch": 4.81629392971246, + "grad_norm": 0.5194814075761426, + "learning_rate": 1.07495225227989e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20017312467098236, + "step": 3015, + "valid_targets_mean": 4127.4, + "valid_targets_min": 1210 + }, + { + "epoch": 4.824281150159744, + "grad_norm": 0.5111828873940237, + "learning_rate": 1.0678955548221595e-05, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16314226388931274, + "step": 3020, + "valid_targets_mean": 4261.6, + "valid_targets_min": 337 + }, + { + "epoch": 4.832268370607029, + "grad_norm": 0.48761163101608956, + "learning_rate": 1.0608536501582654e-05, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20200759172439575, + "step": 3025, + "valid_targets_mean": 4762.9, + "valid_targets_min": 1243 + }, + { + "epoch": 4.840255591054313, + "grad_norm": 0.4630695814574282, + "learning_rate": 1.0538266500454739e-05, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17577683925628662, + "step": 3030, + "valid_targets_mean": 5338.1, + "valid_targets_min": 961 + }, + { + "epoch": 4.848242811501597, + "grad_norm": 0.42697756027113803, + "learning_rate": 1.0468146660045118e-05, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17181923985481262, + "step": 3035, + "valid_targets_mean": 6130.8, + "valid_targets_min": 2510 + }, + { + "epoch": 4.856230031948882, + "grad_norm": 0.4518844364317005, + "learning_rate": 1.0398178093177928e-05, + "loss": 0.167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15323194861412048, + "step": 3040, + "valid_targets_mean": 4782.7, + "valid_targets_min": 940 + }, + { + "epoch": 4.864217252396166, + "grad_norm": 0.5664237237310001, + "learning_rate": 1.0328361910276592e-05, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16498196125030518, + "step": 3045, + "valid_targets_mean": 4808.9, + "valid_targets_min": 1623 + }, + { + "epoch": 4.872204472843451, + "grad_norm": 0.47173286112075913, + "learning_rate": 1.0258699219346091e-05, + "loss": 0.1754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17309358716011047, + "step": 3050, + "valid_targets_mean": 4583.5, + "valid_targets_min": 2051 + }, + { + "epoch": 4.880191693290735, + "grad_norm": 0.5718052290052063, + "learning_rate": 1.0189191125955503e-05, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17422272264957428, + "step": 3055, + "valid_targets_mean": 3853.5, + "valid_targets_min": 805 + }, + { + "epoch": 4.888178913738019, + "grad_norm": 0.4620315195605763, + "learning_rate": 1.0119838733220342e-05, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1558941751718521, + "step": 3060, + "valid_targets_mean": 4822.6, + "valid_targets_min": 695 + }, + { + "epoch": 4.896166134185304, + "grad_norm": 0.5131232008912642, + "learning_rate": 1.0050643141785148e-05, + "loss": 0.1805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16869670152664185, + "step": 3065, + "valid_targets_mean": 4437.9, + "valid_targets_min": 656 + }, + { + "epoch": 4.904153354632588, + "grad_norm": 0.4410859299182515, + "learning_rate": 9.981605449805933e-06, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18083491921424866, + "step": 3070, + "valid_targets_mean": 5510.3, + "valid_targets_min": 1818 + }, + { + "epoch": 4.912140575079873, + "grad_norm": 0.4514321375646226, + "learning_rate": 9.912726752932823e-06, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16155874729156494, + "step": 3075, + "valid_targets_mean": 5034.1, + "valid_targets_min": 1344 + }, + { + "epoch": 4.920127795527156, + "grad_norm": 0.4980585873144301, + "learning_rate": 9.844008144292643e-06, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24201665818691254, + "step": 3080, + "valid_targets_mean": 4619.4, + "valid_targets_min": 571 + }, + { + "epoch": 4.928115015974441, + "grad_norm": 0.559552867245746, + "learning_rate": 9.775450714471537e-06, + "loss": 0.181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1549963653087616, + "step": 3085, + "valid_targets_mean": 4439.6, + "valid_targets_min": 1307 + }, + { + "epoch": 4.936102236421725, + "grad_norm": 0.44659416714775546, + "learning_rate": 9.707055551497734e-06, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1799061894416809, + "step": 3090, + "valid_targets_mean": 5328.0, + "valid_targets_min": 1392 + }, + { + "epoch": 4.944089456869009, + "grad_norm": 0.4828672393131135, + "learning_rate": 9.638823740824177e-06, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19210804998874664, + "step": 3095, + "valid_targets_mean": 5063.3, + "valid_targets_min": 916 + }, + { + "epoch": 4.952076677316294, + "grad_norm": 0.6302157500037942, + "learning_rate": 9.570756365311418e-06, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19101989269256592, + "step": 3100, + "valid_targets_mean": 3066.4, + "valid_targets_min": 626 + }, + { + "epoch": 4.960063897763578, + "grad_norm": 0.4651924692921601, + "learning_rate": 9.502854505210326e-06, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1738903820514679, + "step": 3105, + "valid_targets_mean": 5021.1, + "valid_targets_min": 1392 + }, + { + "epoch": 4.968051118210862, + "grad_norm": 0.5080118101677551, + "learning_rate": 9.435119238145018e-06, + "loss": 0.1791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1990775465965271, + "step": 3110, + "valid_targets_mean": 5799.6, + "valid_targets_min": 1404 + }, + { + "epoch": 4.976038338658147, + "grad_norm": 0.4644747451905339, + "learning_rate": 9.367551639095704e-06, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14686700701713562, + "step": 3115, + "valid_targets_mean": 4626.5, + "valid_targets_min": 409 + }, + { + "epoch": 4.984025559105431, + "grad_norm": 0.42687008353899464, + "learning_rate": 9.300152780381664e-06, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16838842630386353, + "step": 3120, + "valid_targets_mean": 5399.3, + "valid_targets_min": 1914 + }, + { + "epoch": 4.992012779552716, + "grad_norm": 0.38161676379593085, + "learning_rate": 9.232923731644216e-06, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14308978617191315, + "step": 3125, + "valid_targets_mean": 5909.3, + "valid_targets_min": 2080 + }, + { + "epoch": 5.0, + "grad_norm": 0.47394287634536164, + "learning_rate": 9.16586555982972e-06, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1663876175880432, + "step": 3130, + "valid_targets_mean": 4612.8, + "valid_targets_min": 1419 + }, + { + "epoch": 5.007987220447284, + "grad_norm": 0.45724309774968813, + "learning_rate": 9.098979329172702e-06, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15543916821479797, + "step": 3135, + "valid_targets_mean": 5156.6, + "valid_targets_min": 1269 + }, + { + "epoch": 5.015974440894569, + "grad_norm": 0.5375515015395808, + "learning_rate": 9.032266101178872e-06, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1786627471446991, + "step": 3140, + "valid_targets_mean": 4052.9, + "valid_targets_min": 409 + }, + { + "epoch": 5.023961661341853, + "grad_norm": 0.5138573644283863, + "learning_rate": 8.965726934608392e-06, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17196139693260193, + "step": 3145, + "valid_targets_mean": 6069.1, + "valid_targets_min": 2498 + }, + { + "epoch": 5.031948881789138, + "grad_norm": 0.43963520310638937, + "learning_rate": 8.899362885458964e-06, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15448343753814697, + "step": 3150, + "valid_targets_mean": 4924.6, + "valid_targets_min": 1058 + }, + { + "epoch": 5.039936102236422, + "grad_norm": 0.5289418769845741, + "learning_rate": 8.833175006949165e-06, + "loss": 0.1554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15173891186714172, + "step": 3155, + "valid_targets_mean": 3554.7, + "valid_targets_min": 714 + }, + { + "epoch": 5.047923322683706, + "grad_norm": 0.5972361696404577, + "learning_rate": 8.767164349501648e-06, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18872520327568054, + "step": 3160, + "valid_targets_mean": 5194.0, + "valid_targets_min": 611 + }, + { + "epoch": 5.055910543130991, + "grad_norm": 0.4522349154786805, + "learning_rate": 8.70133196072654e-06, + "loss": 0.1654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1551792174577713, + "step": 3165, + "valid_targets_mean": 5324.4, + "valid_targets_min": 1254 + }, + { + "epoch": 5.063897763578275, + "grad_norm": 0.5166382639587666, + "learning_rate": 8.63567888540478e-06, + "loss": 0.1637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1354677975177765, + "step": 3170, + "valid_targets_mean": 3944.1, + "valid_targets_min": 1318 + }, + { + "epoch": 5.0718849840255595, + "grad_norm": 0.5458370995208548, + "learning_rate": 8.570206165471535e-06, + "loss": 0.1603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21429777145385742, + "step": 3175, + "valid_targets_mean": 4476.5, + "valid_targets_min": 1940 + }, + { + "epoch": 5.079872204472843, + "grad_norm": 0.5005297076281998, + "learning_rate": 8.504914839999691e-06, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14907950162887573, + "step": 3180, + "valid_targets_mean": 4650.1, + "valid_targets_min": 1352 + }, + { + "epoch": 5.087859424920127, + "grad_norm": 0.4579906641875055, + "learning_rate": 8.439805945183333e-06, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20461249351501465, + "step": 3185, + "valid_targets_mean": 6407.1, + "valid_targets_min": 1645 + }, + { + "epoch": 5.095846645367412, + "grad_norm": 0.5348372856234672, + "learning_rate": 8.374880514321318e-06, + "loss": 0.1626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15267446637153625, + "step": 3190, + "valid_targets_mean": 3976.1, + "valid_targets_min": 957 + }, + { + "epoch": 5.103833865814696, + "grad_norm": 0.42872203869233483, + "learning_rate": 8.310139577800864e-06, + "loss": 0.1645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15673330426216125, + "step": 3195, + "valid_targets_mean": 5642.8, + "valid_targets_min": 2409 + }, + { + "epoch": 5.111821086261981, + "grad_norm": 0.4520202286446449, + "learning_rate": 8.245584163081228e-06, + "loss": 0.1535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15110281109809875, + "step": 3200, + "valid_targets_mean": 5858.1, + "valid_targets_min": 3241 + }, + { + "epoch": 5.119808306709265, + "grad_norm": 0.4759288491846095, + "learning_rate": 8.18121529467735e-06, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.135267436504364, + "step": 3205, + "valid_targets_mean": 4472.4, + "valid_targets_min": 1280 + }, + { + "epoch": 5.127795527156549, + "grad_norm": 0.5449524467506791, + "learning_rate": 8.117033994143635e-06, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1479802131652832, + "step": 3210, + "valid_targets_mean": 4995.2, + "valid_targets_min": 739 + }, + { + "epoch": 5.135782747603834, + "grad_norm": 0.5234094996626493, + "learning_rate": 8.053041280057733e-06, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1646098494529724, + "step": 3215, + "valid_targets_mean": 4460.2, + "valid_targets_min": 823 + }, + { + "epoch": 5.143769968051118, + "grad_norm": 0.4668031288928073, + "learning_rate": 7.989238168004347e-06, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1528545469045639, + "step": 3220, + "valid_targets_mean": 4892.1, + "valid_targets_min": 1459 + }, + { + "epoch": 5.151757188498403, + "grad_norm": 0.46150987530655635, + "learning_rate": 7.925625670559158e-06, + "loss": 0.1627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16160641610622406, + "step": 3225, + "valid_targets_mean": 5333.8, + "valid_targets_min": 1736 + }, + { + "epoch": 5.159744408945687, + "grad_norm": 0.5603329278900041, + "learning_rate": 7.862204797272716e-06, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15708664059638977, + "step": 3230, + "valid_targets_mean": 4700.1, + "valid_targets_min": 791 + }, + { + "epoch": 5.167731629392971, + "grad_norm": 0.45539642954548165, + "learning_rate": 7.798976554654438e-06, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1746443808078766, + "step": 3235, + "valid_targets_mean": 5515.9, + "valid_targets_min": 1947 + }, + { + "epoch": 5.175718849840256, + "grad_norm": 0.4994398855561148, + "learning_rate": 7.73594194615663e-06, + "loss": 0.1672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1738542914390564, + "step": 3240, + "valid_targets_mean": 4582.4, + "valid_targets_min": 572 + }, + { + "epoch": 5.18370607028754, + "grad_norm": 0.5240890324025992, + "learning_rate": 7.673101972158572e-06, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17766988277435303, + "step": 3245, + "valid_targets_mean": 4482.9, + "valid_targets_min": 617 + }, + { + "epoch": 5.1916932907348246, + "grad_norm": 0.45091164589846017, + "learning_rate": 7.610457629950621e-06, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15332496166229248, + "step": 3250, + "valid_targets_mean": 5425.2, + "valid_targets_min": 936 + }, + { + "epoch": 5.199680511182109, + "grad_norm": 0.4892456016566229, + "learning_rate": 7.548009913718402e-06, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1631796956062317, + "step": 3255, + "valid_targets_mean": 4840.7, + "valid_targets_min": 901 + }, + { + "epoch": 5.207667731629393, + "grad_norm": 0.49628523854824014, + "learning_rate": 7.485759814527034e-06, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15594470500946045, + "step": 3260, + "valid_targets_mean": 4835.2, + "valid_targets_min": 2353 + }, + { + "epoch": 5.215654952076678, + "grad_norm": 0.4767934545124, + "learning_rate": 7.423708320305361e-06, + "loss": 0.1548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15322643518447876, + "step": 3265, + "valid_targets_mean": 4935.9, + "valid_targets_min": 1314 + }, + { + "epoch": 5.223642172523961, + "grad_norm": 0.46585801665699067, + "learning_rate": 7.361856415830335e-06, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17025049030780792, + "step": 3270, + "valid_targets_mean": 5506.8, + "valid_targets_min": 942 + }, + { + "epoch": 5.231629392971246, + "grad_norm": 0.49838933432010535, + "learning_rate": 7.300205082711336e-06, + "loss": 0.1725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16799825429916382, + "step": 3275, + "valid_targets_mean": 5049.1, + "valid_targets_min": 1453 + }, + { + "epoch": 5.23961661341853, + "grad_norm": 0.4496138993305811, + "learning_rate": 7.2387552993746204e-06, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1527540236711502, + "step": 3280, + "valid_targets_mean": 6115.8, + "valid_targets_min": 786 + }, + { + "epoch": 5.247603833865814, + "grad_norm": 0.520151851078282, + "learning_rate": 7.177508041047769e-06, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16873812675476074, + "step": 3285, + "valid_targets_mean": 4611.3, + "valid_targets_min": 2114 + }, + { + "epoch": 5.255591054313099, + "grad_norm": 0.6413493447918366, + "learning_rate": 7.116464279744262e-06, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1801944375038147, + "step": 3290, + "valid_targets_mean": 5371.3, + "valid_targets_min": 2600 + }, + { + "epoch": 5.263578274760383, + "grad_norm": 0.42212429264005713, + "learning_rate": 7.055624984247977e-06, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15957115590572357, + "step": 3295, + "valid_targets_mean": 5400.9, + "valid_targets_min": 896 + }, + { + "epoch": 5.271565495207668, + "grad_norm": 0.46092174693212845, + "learning_rate": 6.9949911200978805e-06, + "loss": 0.1643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1509469449520111, + "step": 3300, + "valid_targets_mean": 4963.5, + "valid_targets_min": 1635 + }, + { + "epoch": 5.279552715654952, + "grad_norm": 0.4572057037635674, + "learning_rate": 6.934563649572678e-06, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14917771518230438, + "step": 3305, + "valid_targets_mean": 4919.5, + "valid_targets_min": 984 + }, + { + "epoch": 5.287539936102236, + "grad_norm": 0.4623875098548041, + "learning_rate": 6.874343531675521e-06, + "loss": 0.1593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15402501821517944, + "step": 3310, + "valid_targets_mean": 4710.2, + "valid_targets_min": 1263 + }, + { + "epoch": 5.295527156549521, + "grad_norm": 0.4523223248875542, + "learning_rate": 6.814331722118837e-06, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18308000266551971, + "step": 3315, + "valid_targets_mean": 5763.9, + "valid_targets_min": 2312 + }, + { + "epoch": 5.303514376996805, + "grad_norm": 0.5024336853774208, + "learning_rate": 6.754529173309112e-06, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14715887606143951, + "step": 3320, + "valid_targets_mean": 4335.7, + "valid_targets_min": 1574 + }, + { + "epoch": 5.31150159744409, + "grad_norm": 0.5485512556247358, + "learning_rate": 6.694936834331809e-06, + "loss": 0.1668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17354059219360352, + "step": 3325, + "valid_targets_mean": 4979.9, + "valid_targets_min": 1732 + }, + { + "epoch": 5.319488817891374, + "grad_norm": 0.47928325678399936, + "learning_rate": 6.635555650936278e-06, + "loss": 0.1604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17333224415779114, + "step": 3330, + "valid_targets_mean": 5068.4, + "valid_targets_min": 1320 + }, + { + "epoch": 5.327476038338658, + "grad_norm": 0.5934479215992917, + "learning_rate": 6.576386565520794e-06, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15551719069480896, + "step": 3335, + "valid_targets_mean": 6251.5, + "valid_targets_min": 2807 + }, + { + "epoch": 5.335463258785943, + "grad_norm": 0.48745271437862536, + "learning_rate": 6.5174305171175336e-06, + "loss": 0.1544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17440325021743774, + "step": 3340, + "valid_targets_mean": 5215.0, + "valid_targets_min": 1470 + }, + { + "epoch": 5.343450479233227, + "grad_norm": 0.4802249791337759, + "learning_rate": 6.458688441377734e-06, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18174077570438385, + "step": 3345, + "valid_targets_mean": 5208.0, + "valid_targets_min": 1072 + }, + { + "epoch": 5.3514376996805115, + "grad_norm": 0.4664532449254744, + "learning_rate": 6.400161270556817e-06, + "loss": 0.1556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14379502832889557, + "step": 3350, + "valid_targets_mean": 4037.0, + "valid_targets_min": 634 + }, + { + "epoch": 5.359424920127796, + "grad_norm": 0.43779060372472667, + "learning_rate": 6.341849933499573e-06, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1506979912519455, + "step": 3355, + "valid_targets_mean": 6327.8, + "valid_targets_min": 857 + }, + { + "epoch": 5.36741214057508, + "grad_norm": 0.49630264813010194, + "learning_rate": 6.283755355625472e-06, + "loss": 0.1664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18854346871376038, + "step": 3360, + "valid_targets_mean": 5617.9, + "valid_targets_min": 1552 + }, + { + "epoch": 5.375399361022364, + "grad_norm": 0.5457885435607882, + "learning_rate": 6.225878458913917e-06, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17461910843849182, + "step": 3365, + "valid_targets_mean": 5613.8, + "valid_targets_min": 1821 + }, + { + "epoch": 5.383386581469648, + "grad_norm": 0.48035325618255165, + "learning_rate": 6.168220161889678e-06, + "loss": 0.1853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1818755567073822, + "step": 3370, + "valid_targets_mean": 5177.7, + "valid_targets_min": 2053 + }, + { + "epoch": 5.391373801916933, + "grad_norm": 0.5405350831620226, + "learning_rate": 6.110781379608226e-06, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1981622874736786, + "step": 3375, + "valid_targets_mean": 4290.1, + "valid_targets_min": 433 + }, + { + "epoch": 5.399361022364217, + "grad_norm": 1.301410002327568, + "learning_rate": 6.053563023641318e-06, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17580270767211914, + "step": 3380, + "valid_targets_mean": 5276.1, + "valid_targets_min": 1344 + }, + { + "epoch": 5.407348242811501, + "grad_norm": 0.501406306040324, + "learning_rate": 5.996566002062439e-06, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14683012664318085, + "step": 3385, + "valid_targets_mean": 4519.6, + "valid_targets_min": 926 + }, + { + "epoch": 5.415335463258786, + "grad_norm": 0.5051757518619181, + "learning_rate": 5.939791219432442e-06, + "loss": 0.1816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18291693925857544, + "step": 3390, + "valid_targets_mean": 6008.5, + "valid_targets_min": 586 + }, + { + "epoch": 5.42332268370607, + "grad_norm": 0.43841993125264633, + "learning_rate": 5.8832395767851846e-06, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1574784219264984, + "step": 3395, + "valid_targets_mean": 6095.5, + "valid_targets_min": 1844 + }, + { + "epoch": 5.431309904153355, + "grad_norm": 0.5745704992069267, + "learning_rate": 5.826911971613198e-06, + "loss": 0.1728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1651586890220642, + "step": 3400, + "valid_targets_mean": 4589.2, + "valid_targets_min": 483 + }, + { + "epoch": 5.439297124600639, + "grad_norm": 0.4989786194389621, + "learning_rate": 5.770809297853495e-06, + "loss": 0.1512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14755919575691223, + "step": 3405, + "valid_targets_mean": 5938.4, + "valid_targets_min": 2031 + }, + { + "epoch": 5.447284345047923, + "grad_norm": 0.49443831260201854, + "learning_rate": 5.714932445873325e-06, + "loss": 0.1616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1464129090309143, + "step": 3410, + "valid_targets_mean": 4288.4, + "valid_targets_min": 1100 + }, + { + "epoch": 5.455271565495208, + "grad_norm": 0.44461458578301255, + "learning_rate": 5.659282302456115e-06, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14050893485546112, + "step": 3415, + "valid_targets_mean": 4480.2, + "valid_targets_min": 1900 + }, + { + "epoch": 5.463258785942492, + "grad_norm": 0.46880506564818625, + "learning_rate": 5.603859750787302e-06, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16493158042430878, + "step": 3420, + "valid_targets_mean": 4905.8, + "valid_targets_min": 729 + }, + { + "epoch": 5.4712460063897765, + "grad_norm": 0.5510903509258099, + "learning_rate": 5.548665670440418e-06, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1883580982685089, + "step": 3425, + "valid_targets_mean": 5244.1, + "valid_targets_min": 1231 + }, + { + "epoch": 5.479233226837061, + "grad_norm": 0.5762253046887472, + "learning_rate": 5.4937009373630535e-06, + "loss": 0.1668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2006421685218811, + "step": 3430, + "valid_targets_mean": 4248.2, + "valid_targets_min": 391 + }, + { + "epoch": 5.487220447284345, + "grad_norm": 0.4097504486851553, + "learning_rate": 5.438966423862997e-06, + "loss": 0.167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15338310599327087, + "step": 3435, + "valid_targets_mean": 5802.8, + "valid_targets_min": 3432 + }, + { + "epoch": 5.49520766773163, + "grad_norm": 0.4823098748493422, + "learning_rate": 5.384462998594384e-06, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19534525275230408, + "step": 3440, + "valid_targets_mean": 4855.6, + "valid_targets_min": 1535 + }, + { + "epoch": 5.503194888178914, + "grad_norm": 0.5244003410758836, + "learning_rate": 5.330191526543884e-06, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15437617897987366, + "step": 3445, + "valid_targets_mean": 4552.6, + "valid_targets_min": 1260 + }, + { + "epoch": 5.511182108626198, + "grad_norm": 0.7176789774329526, + "learning_rate": 5.276152869017028e-06, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17744609713554382, + "step": 3450, + "valid_targets_mean": 3896.1, + "valid_targets_min": 747 + }, + { + "epoch": 5.519169329073483, + "grad_norm": 0.5937799295103032, + "learning_rate": 5.22234788362447e-06, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1442209631204605, + "step": 3455, + "valid_targets_mean": 4732.0, + "valid_targets_min": 2590 + }, + { + "epoch": 5.527156549520766, + "grad_norm": 0.45301538134789104, + "learning_rate": 5.168777424268454e-06, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15300558507442474, + "step": 3460, + "valid_targets_mean": 5012.1, + "valid_targets_min": 1063 + }, + { + "epoch": 5.535143769968051, + "grad_norm": 0.5152594689501937, + "learning_rate": 5.115442341129171e-06, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18040233850479126, + "step": 3465, + "valid_targets_mean": 4254.9, + "valid_targets_min": 2080 + }, + { + "epoch": 5.543130990415335, + "grad_norm": 0.49154076018681375, + "learning_rate": 5.062343480651364e-06, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.161627858877182, + "step": 3470, + "valid_targets_mean": 4199.4, + "valid_targets_min": 641 + }, + { + "epoch": 5.55111821086262, + "grad_norm": 0.48941128766239755, + "learning_rate": 5.009481685530817e-06, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15404511988162994, + "step": 3475, + "valid_targets_mean": 4560.2, + "valid_targets_min": 1653 + }, + { + "epoch": 5.559105431309904, + "grad_norm": 0.525948500348585, + "learning_rate": 4.956857794701026e-06, + "loss": 0.1695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1696540117263794, + "step": 3480, + "valid_targets_mean": 3846.1, + "valid_targets_min": 625 + }, + { + "epoch": 5.567092651757188, + "grad_norm": 0.49936612277457104, + "learning_rate": 4.904472643319873e-06, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15820062160491943, + "step": 3485, + "valid_targets_mean": 4445.6, + "valid_targets_min": 1523 + }, + { + "epoch": 5.575079872204473, + "grad_norm": 0.4729205139876077, + "learning_rate": 4.852327062756352e-06, + "loss": 0.1664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16825735569000244, + "step": 3490, + "valid_targets_mean": 4923.2, + "valid_targets_min": 1543 + }, + { + "epoch": 5.583067092651757, + "grad_norm": 0.4560925106569269, + "learning_rate": 4.800421880577411e-06, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16373009979724884, + "step": 3495, + "valid_targets_mean": 5574.2, + "valid_targets_min": 1357 + }, + { + "epoch": 5.5910543130990416, + "grad_norm": 0.466756243293937, + "learning_rate": 4.748757920534779e-06, + "loss": 0.1776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19758476316928864, + "step": 3500, + "valid_targets_mean": 5720.0, + "valid_targets_min": 1298 + }, + { + "epoch": 5.599041533546326, + "grad_norm": 0.5396498066660705, + "learning_rate": 4.697336002551947e-06, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19277673959732056, + "step": 3505, + "valid_targets_mean": 6450.8, + "valid_targets_min": 2080 + }, + { + "epoch": 5.60702875399361, + "grad_norm": 0.4782218522459289, + "learning_rate": 4.6461569427110684e-06, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16561949253082275, + "step": 3510, + "valid_targets_mean": 5124.3, + "valid_targets_min": 1392 + }, + { + "epoch": 5.615015974440895, + "grad_norm": 0.5304339963977932, + "learning_rate": 4.5952215532401146e-06, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16678208112716675, + "step": 3515, + "valid_targets_mean": 4472.6, + "valid_targets_min": 1156 + }, + { + "epoch": 5.623003194888179, + "grad_norm": 0.47151339401839326, + "learning_rate": 4.544530642499894e-06, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16763369739055634, + "step": 3520, + "valid_targets_mean": 5364.7, + "valid_targets_min": 614 + }, + { + "epoch": 5.6309904153354635, + "grad_norm": 0.5501456164083334, + "learning_rate": 4.4940850149712765e-06, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1649385541677475, + "step": 3525, + "valid_targets_mean": 4224.4, + "valid_targets_min": 1217 + }, + { + "epoch": 5.638977635782748, + "grad_norm": 0.5241537472573441, + "learning_rate": 4.443885471242418e-06, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24142885208129883, + "step": 3530, + "valid_targets_mean": 4711.8, + "valid_targets_min": 852 + }, + { + "epoch": 5.646964856230032, + "grad_norm": 0.5267524806152389, + "learning_rate": 4.393932807996017e-06, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16665439307689667, + "step": 3535, + "valid_targets_mean": 5355.3, + "valid_targets_min": 2521 + }, + { + "epoch": 5.654952076677317, + "grad_norm": 0.5167579479066717, + "learning_rate": 4.344227817996735e-06, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22691744565963745, + "step": 3540, + "valid_targets_mean": 5392.4, + "valid_targets_min": 1781 + }, + { + "epoch": 5.6629392971246, + "grad_norm": 0.5392095499369923, + "learning_rate": 4.294771290078548e-06, + "loss": 0.1574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1580587923526764, + "step": 3545, + "valid_targets_mean": 4370.6, + "valid_targets_min": 1450 + }, + { + "epoch": 5.6709265175718855, + "grad_norm": 0.4500098175323538, + "learning_rate": 4.245564009132293e-06, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13657471537590027, + "step": 3550, + "valid_targets_mean": 5291.3, + "valid_targets_min": 974 + }, + { + "epoch": 5.678913738019169, + "grad_norm": 0.4882435750022714, + "learning_rate": 4.196606756093138e-06, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14333054423332214, + "step": 3555, + "valid_targets_mean": 5113.7, + "valid_targets_min": 1822 + }, + { + "epoch": 5.686900958466453, + "grad_norm": 0.539438359838628, + "learning_rate": 4.147900307928268e-06, + "loss": 0.1567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15099304914474487, + "step": 3560, + "valid_targets_mean": 4687.0, + "valid_targets_min": 1194 + }, + { + "epoch": 5.694888178913738, + "grad_norm": 0.5099354746112605, + "learning_rate": 4.099445437624487e-06, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17994922399520874, + "step": 3565, + "valid_targets_mean": 4840.9, + "valid_targets_min": 508 + }, + { + "epoch": 5.702875399361022, + "grad_norm": 0.4910940595601601, + "learning_rate": 4.051242914175995e-06, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16600370407104492, + "step": 3570, + "valid_targets_mean": 5274.4, + "valid_targets_min": 568 + }, + { + "epoch": 5.710862619808307, + "grad_norm": 0.4937393027275975, + "learning_rate": 4.003293502572163e-06, + "loss": 0.1638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18773019313812256, + "step": 3575, + "valid_targets_mean": 4499.5, + "valid_targets_min": 420 + }, + { + "epoch": 5.718849840255591, + "grad_norm": 0.44621833560642143, + "learning_rate": 3.955597963785391e-06, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15206709504127502, + "step": 3580, + "valid_targets_mean": 5360.9, + "valid_targets_min": 773 + }, + { + "epoch": 5.726837060702875, + "grad_norm": 0.46879550301828304, + "learning_rate": 3.908157054759048e-06, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15934661030769348, + "step": 3585, + "valid_targets_mean": 5322.0, + "valid_targets_min": 909 + }, + { + "epoch": 5.73482428115016, + "grad_norm": 0.4680808523084378, + "learning_rate": 3.860971528395427e-06, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17729443311691284, + "step": 3590, + "valid_targets_mean": 5461.9, + "valid_targets_min": 2250 + }, + { + "epoch": 5.742811501597444, + "grad_norm": 0.5267451694944348, + "learning_rate": 3.814042133543847e-06, + "loss": 0.1626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16615736484527588, + "step": 3595, + "valid_targets_mean": 3859.7, + "valid_targets_min": 646 + }, + { + "epoch": 5.7507987220447285, + "grad_norm": 0.514060632798016, + "learning_rate": 3.7673696149887117e-06, + "loss": 0.1663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16031639277935028, + "step": 3600, + "valid_targets_mean": 5673.9, + "valid_targets_min": 3182 + }, + { + "epoch": 5.758785942492013, + "grad_norm": 0.49974165896330014, + "learning_rate": 3.72095471343773e-06, + "loss": 0.1657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18452566862106323, + "step": 3605, + "valid_targets_mean": 4920.8, + "valid_targets_min": 1172 + }, + { + "epoch": 5.766773162939297, + "grad_norm": 0.6833706381129956, + "learning_rate": 3.674798165510136e-06, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17326435446739197, + "step": 3610, + "valid_targets_mean": 3820.6, + "valid_targets_min": 336 + }, + { + "epoch": 5.774760383386582, + "grad_norm": 0.48946150619688195, + "learning_rate": 3.6289007037250244e-06, + "loss": 0.1663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1674729883670807, + "step": 3615, + "valid_targets_mean": 4756.6, + "valid_targets_min": 546 + }, + { + "epoch": 5.782747603833866, + "grad_norm": 0.6981098068418168, + "learning_rate": 3.5832630564897073e-06, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14979082345962524, + "step": 3620, + "valid_targets_mean": 6019.2, + "valid_targets_min": 1232 + }, + { + "epoch": 5.7907348242811505, + "grad_norm": 0.4415917414218253, + "learning_rate": 3.5378859480881443e-06, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1633869707584381, + "step": 3625, + "valid_targets_mean": 5033.4, + "valid_targets_min": 1642 + }, + { + "epoch": 5.798722044728435, + "grad_norm": 0.5082100658653667, + "learning_rate": 3.492770098669478e-06, + "loss": 0.1623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17087781429290771, + "step": 3630, + "valid_targets_mean": 4868.6, + "valid_targets_min": 2039 + }, + { + "epoch": 5.806709265175719, + "grad_norm": 0.4705356790307018, + "learning_rate": 3.4479162242365717e-06, + "loss": 0.1571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1566528081893921, + "step": 3635, + "valid_targets_mean": 5087.8, + "valid_targets_min": 428 + }, + { + "epoch": 5.814696485623003, + "grad_norm": 0.5289220901032725, + "learning_rate": 3.403325036634679e-06, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16809199750423431, + "step": 3640, + "valid_targets_mean": 4230.8, + "valid_targets_min": 840 + }, + { + "epoch": 5.822683706070287, + "grad_norm": 0.4958668832343072, + "learning_rate": 3.3589972435401184e-06, + "loss": 0.1516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16835638880729675, + "step": 3645, + "valid_targets_mean": 4882.5, + "valid_targets_min": 2241 + }, + { + "epoch": 5.830670926517572, + "grad_norm": 0.48811905886136925, + "learning_rate": 3.3149335484490553e-06, + "loss": 0.174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16630032658576965, + "step": 3650, + "valid_targets_mean": 5152.8, + "valid_targets_min": 562 + }, + { + "epoch": 5.838658146964856, + "grad_norm": 0.512069651873867, + "learning_rate": 3.2711346506663346e-06, + "loss": 0.1692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15492770075798035, + "step": 3655, + "valid_targets_mean": 3918.2, + "valid_targets_min": 1267 + }, + { + "epoch": 5.84664536741214, + "grad_norm": 0.4683577080630245, + "learning_rate": 3.2276012452943893e-06, + "loss": 0.1515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13782991468906403, + "step": 3660, + "valid_targets_mean": 5204.9, + "valid_targets_min": 1990 + }, + { + "epoch": 5.854632587859425, + "grad_norm": 0.4545248711358778, + "learning_rate": 3.1843340232222064e-06, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14945927262306213, + "step": 3665, + "valid_targets_mean": 4706.4, + "valid_targets_min": 1169 + }, + { + "epoch": 5.862619808306709, + "grad_norm": 0.45995293124692316, + "learning_rate": 3.1413336711143437e-06, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14870786666870117, + "step": 3670, + "valid_targets_mean": 4540.5, + "valid_targets_min": 1963 + }, + { + "epoch": 5.8706070287539935, + "grad_norm": 0.47027428301454915, + "learning_rate": 3.0986008714000703e-06, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19357925653457642, + "step": 3675, + "valid_targets_mean": 5395.3, + "valid_targets_min": 661 + }, + { + "epoch": 5.878594249201278, + "grad_norm": 0.47417060520818427, + "learning_rate": 3.056136302262489e-06, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17412704229354858, + "step": 3680, + "valid_targets_mean": 5844.1, + "valid_targets_min": 2300 + }, + { + "epoch": 5.886581469648562, + "grad_norm": 0.4974733331740804, + "learning_rate": 3.0139406376278212e-06, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16733218729496002, + "step": 3685, + "valid_targets_mean": 4846.6, + "valid_targets_min": 936 + }, + { + "epoch": 5.894568690095847, + "grad_norm": 0.5231984609118597, + "learning_rate": 2.972014547154671e-06, + "loss": 0.1609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1489640772342682, + "step": 3690, + "valid_targets_mean": 4862.9, + "valid_targets_min": 651 + }, + { + "epoch": 5.902555910543131, + "grad_norm": 0.4859221837793371, + "learning_rate": 2.930358696223423e-06, + "loss": 0.1682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1759262979030609, + "step": 3695, + "valid_targets_mean": 5368.2, + "valid_targets_min": 899 + }, + { + "epoch": 5.9105431309904155, + "grad_norm": 0.47077473092171224, + "learning_rate": 2.8889737459256695e-06, + "loss": 0.1664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16548126935958862, + "step": 3700, + "valid_targets_mean": 5086.6, + "valid_targets_min": 784 + }, + { + "epoch": 5.9185303514377, + "grad_norm": 0.49432424854437196, + "learning_rate": 2.8478603530537285e-06, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15692386031150818, + "step": 3705, + "valid_targets_mean": 4616.9, + "valid_targets_min": 1698 + }, + { + "epoch": 5.926517571884984, + "grad_norm": 0.49861584831517203, + "learning_rate": 2.8070191700902194e-06, + "loss": 0.1692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14717496931552887, + "step": 3710, + "valid_targets_mean": 4043.9, + "valid_targets_min": 358 + }, + { + "epoch": 5.934504792332269, + "grad_norm": 0.5227096968437029, + "learning_rate": 2.7664508451976903e-06, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14311034977436066, + "step": 3715, + "valid_targets_mean": 5310.3, + "valid_targets_min": 822 + }, + { + "epoch": 5.942492012779553, + "grad_norm": 0.5401942371530758, + "learning_rate": 2.726156022208362e-06, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19872379302978516, + "step": 3720, + "valid_targets_mean": 4996.3, + "valid_targets_min": 1360 + }, + { + "epoch": 5.950479233226837, + "grad_norm": 0.5036722120094557, + "learning_rate": 2.6861353406138713e-06, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18039855360984802, + "step": 3725, + "valid_targets_mean": 5059.4, + "valid_targets_min": 835 + }, + { + "epoch": 5.958466453674122, + "grad_norm": 0.4768709414023673, + "learning_rate": 2.646389435555172e-06, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16467642784118652, + "step": 3730, + "valid_targets_mean": 4775.9, + "valid_targets_min": 1113 + }, + { + "epoch": 5.966453674121405, + "grad_norm": 0.4505152766422281, + "learning_rate": 2.6069189378124015e-06, + "loss": 0.1686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1375581920146942, + "step": 3735, + "valid_targets_mean": 5153.4, + "valid_targets_min": 1586 + }, + { + "epoch": 5.97444089456869, + "grad_norm": 0.6295109583708369, + "learning_rate": 2.567724473794908e-06, + "loss": 0.177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18594983220100403, + "step": 3740, + "valid_targets_mean": 3786.9, + "valid_targets_min": 683 + }, + { + "epoch": 5.982428115015974, + "grad_norm": 0.4200857765813741, + "learning_rate": 2.5288066655312914e-06, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.163049578666687, + "step": 3745, + "valid_targets_mean": 6365.8, + "valid_targets_min": 2095 + }, + { + "epoch": 5.9904153354632586, + "grad_norm": 0.6254325903030274, + "learning_rate": 2.4901661306595414e-06, + "loss": 0.1744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17417556047439575, + "step": 3750, + "valid_targets_mean": 4794.1, + "valid_targets_min": 501 + }, + { + "epoch": 5.998402555910543, + "grad_norm": 0.5264492916089583, + "learning_rate": 2.451803482417234e-06, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14926278591156006, + "step": 3755, + "valid_targets_mean": 4241.8, + "valid_targets_min": 1562 + }, + { + "epoch": 6.006389776357827, + "grad_norm": 0.49203753066718664, + "learning_rate": 2.413719329631785e-06, + "loss": 0.1527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15311649441719055, + "step": 3760, + "valid_targets_mean": 4905.8, + "valid_targets_min": 1987 + }, + { + "epoch": 6.014376996805112, + "grad_norm": 0.5951326896601777, + "learning_rate": 2.375914276710811e-06, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16578024625778198, + "step": 3765, + "valid_targets_mean": 4779.8, + "valid_targets_min": 1100 + }, + { + "epoch": 6.022364217252396, + "grad_norm": 0.48183368867035825, + "learning_rate": 2.338388923632513e-06, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16086134314537048, + "step": 3770, + "valid_targets_mean": 4555.1, + "valid_targets_min": 680 + }, + { + "epoch": 6.0303514376996805, + "grad_norm": 0.4991514794168638, + "learning_rate": 2.3011438659361794e-06, + "loss": 0.1461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15352702140808105, + "step": 3775, + "valid_targets_mean": 5390.8, + "valid_targets_min": 2574 + }, + { + "epoch": 6.038338658146965, + "grad_norm": 0.5004302788098899, + "learning_rate": 2.2641796947127114e-06, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16071294248104095, + "step": 3780, + "valid_targets_mean": 5227.7, + "valid_targets_min": 1983 + }, + { + "epoch": 6.046325878594249, + "grad_norm": 0.47919983329397353, + "learning_rate": 2.2274969965952553e-06, + "loss": 0.1402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14981648325920105, + "step": 3785, + "valid_targets_mean": 4976.9, + "valid_targets_min": 2021 + }, + { + "epoch": 6.054313099041534, + "grad_norm": 0.47542744449520763, + "learning_rate": 2.1910963537498887e-06, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14725175499916077, + "step": 3790, + "valid_targets_mean": 4814.4, + "valid_targets_min": 690 + }, + { + "epoch": 6.062300319488818, + "grad_norm": 0.4842460038834621, + "learning_rate": 2.1549783438663872e-06, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1420721560716629, + "step": 3795, + "valid_targets_mean": 5436.4, + "valid_targets_min": 1793 + }, + { + "epoch": 6.0702875399361025, + "grad_norm": 0.47140984563889077, + "learning_rate": 2.1191435401490534e-06, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1739579290151596, + "step": 3800, + "valid_targets_mean": 5437.5, + "valid_targets_min": 919 + }, + { + "epoch": 6.078274760383387, + "grad_norm": 0.7446969509152094, + "learning_rate": 2.0835925113076062e-06, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16405190527439117, + "step": 3805, + "valid_targets_mean": 5858.0, + "valid_targets_min": 1543 + }, + { + "epoch": 6.086261980830671, + "grad_norm": 0.4456085804468523, + "learning_rate": 2.0483258215481784e-06, + "loss": 0.1574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14001686871051788, + "step": 3810, + "valid_targets_mean": 5576.1, + "valid_targets_min": 998 + }, + { + "epoch": 6.094249201277956, + "grad_norm": 0.4581135228921414, + "learning_rate": 2.0133440305643413e-06, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1370004415512085, + "step": 3815, + "valid_targets_mean": 4656.1, + "valid_targets_min": 1325 + }, + { + "epoch": 6.102236421725239, + "grad_norm": 0.4478316277948835, + "learning_rate": 1.9786476935282463e-06, + "loss": 0.1559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14399506151676178, + "step": 3820, + "valid_targets_mean": 5733.0, + "valid_targets_min": 1816 + }, + { + "epoch": 6.110223642172524, + "grad_norm": 0.445274051117867, + "learning_rate": 1.944237361081782e-06, + "loss": 0.1586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14635354280471802, + "step": 3825, + "valid_targets_mean": 5607.6, + "valid_targets_min": 428 + }, + { + "epoch": 6.118210862619808, + "grad_norm": 0.6415233850020196, + "learning_rate": 1.9101135793278746e-06, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1825276017189026, + "step": 3830, + "valid_targets_mean": 4352.9, + "valid_targets_min": 329 + }, + { + "epoch": 6.126198083067092, + "grad_norm": 0.5276389299287585, + "learning_rate": 1.8762768898217732e-06, + "loss": 0.1634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1704082190990448, + "step": 3835, + "valid_targets_mean": 4400.9, + "valid_targets_min": 942 + }, + { + "epoch": 6.134185303514377, + "grad_norm": 0.55009561912505, + "learning_rate": 1.8427278295625006e-06, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19751335680484772, + "step": 3840, + "valid_targets_mean": 4488.4, + "valid_targets_min": 896 + }, + { + "epoch": 6.142172523961661, + "grad_norm": 0.48859746009960264, + "learning_rate": 1.8094669309843161e-06, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12780989706516266, + "step": 3845, + "valid_targets_mean": 3926.5, + "valid_targets_min": 1012 + }, + { + "epoch": 6.1501597444089455, + "grad_norm": 0.5607367725075801, + "learning_rate": 1.776494721948241e-06, + "loss": 0.1548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14702245593070984, + "step": 3850, + "valid_targets_mean": 4336.1, + "valid_targets_min": 1453 + }, + { + "epoch": 6.15814696485623, + "grad_norm": 0.43671641575643083, + "learning_rate": 1.7438117257337239e-06, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16328737139701843, + "step": 3855, + "valid_targets_mean": 5713.7, + "valid_targets_min": 1510 + }, + { + "epoch": 6.166134185303514, + "grad_norm": 0.4725994169019507, + "learning_rate": 1.7114184610302964e-06, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16375653445720673, + "step": 3860, + "valid_targets_mean": 5364.8, + "valid_targets_min": 1867 + }, + { + "epoch": 6.174121405750799, + "grad_norm": 0.43920140069675684, + "learning_rate": 1.67931544192937e-06, + "loss": 0.1551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1650637686252594, + "step": 3865, + "valid_targets_mean": 5834.2, + "valid_targets_min": 2987 + }, + { + "epoch": 6.182108626198083, + "grad_norm": 0.6998947577393563, + "learning_rate": 1.6475031779160611e-06, + "loss": 0.1602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16513022780418396, + "step": 3870, + "valid_targets_mean": 5041.4, + "valid_targets_min": 862 + }, + { + "epoch": 6.1900958466453675, + "grad_norm": 0.5083668808608908, + "learning_rate": 1.6159821738611192e-06, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1575690358877182, + "step": 3875, + "valid_targets_mean": 4826.8, + "valid_targets_min": 519 + }, + { + "epoch": 6.198083067092652, + "grad_norm": 0.4593445751109819, + "learning_rate": 1.5847529300128827e-06, + "loss": 0.1686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16691654920578003, + "step": 3880, + "valid_targets_mean": 5452.4, + "valid_targets_min": 1090 + }, + { + "epoch": 6.206070287539936, + "grad_norm": 0.5422017808243945, + "learning_rate": 1.5538159419893895e-06, + "loss": 0.1559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15538769960403442, + "step": 3885, + "valid_targets_mean": 4313.0, + "valid_targets_min": 1022 + }, + { + "epoch": 6.214057507987221, + "grad_norm": 0.5347913106039871, + "learning_rate": 1.5231717007704738e-06, + "loss": 0.1475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1320032775402069, + "step": 3890, + "valid_targets_mean": 4379.3, + "valid_targets_min": 2080 + }, + { + "epoch": 6.222044728434505, + "grad_norm": 0.4474745812734044, + "learning_rate": 1.4928206926899801e-06, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14763572812080383, + "step": 3895, + "valid_targets_mean": 5836.9, + "valid_targets_min": 2325 + }, + { + "epoch": 6.2300319488817895, + "grad_norm": 0.47433310434755555, + "learning_rate": 1.4627633994280599e-06, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1461184024810791, + "step": 3900, + "valid_targets_mean": 5509.9, + "valid_targets_min": 865 + }, + { + "epoch": 6.238019169329074, + "grad_norm": 0.4529194089949568, + "learning_rate": 1.433000298003504e-06, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1502389758825302, + "step": 3905, + "valid_targets_mean": 6055.1, + "valid_targets_min": 2880 + }, + { + "epoch": 6.246006389776358, + "grad_norm": 0.5221343423400612, + "learning_rate": 1.4035318607662029e-06, + "loss": 0.1716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18835774064064026, + "step": 3910, + "valid_targets_mean": 5647.4, + "valid_targets_min": 557 + }, + { + "epoch": 6.253993610223642, + "grad_norm": 0.4696728514223722, + "learning_rate": 1.3743585553896144e-06, + "loss": 0.1643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16331344842910767, + "step": 3915, + "valid_targets_mean": 5139.0, + "valid_targets_min": 802 + }, + { + "epoch": 6.261980830670926, + "grad_norm": 0.5698993865428698, + "learning_rate": 1.345480844863376e-06, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1860525906085968, + "step": 3920, + "valid_targets_mean": 4267.4, + "valid_targets_min": 1880 + }, + { + "epoch": 6.2699680511182105, + "grad_norm": 0.5391313982087154, + "learning_rate": 1.316899187485925e-06, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1673540621995926, + "step": 3925, + "valid_targets_mean": 5031.1, + "valid_targets_min": 1518 + }, + { + "epoch": 6.277955271565495, + "grad_norm": 0.5119925703283522, + "learning_rate": 1.288614036857252e-06, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15239587426185608, + "step": 3930, + "valid_targets_mean": 4621.6, + "valid_targets_min": 2205 + }, + { + "epoch": 6.285942492012779, + "grad_norm": 0.5002344474981675, + "learning_rate": 1.260625841871692e-06, + "loss": 0.1571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17312490940093994, + "step": 3935, + "valid_targets_mean": 4581.0, + "valid_targets_min": 952 + }, + { + "epoch": 6.293929712460064, + "grad_norm": 0.5471905188175354, + "learning_rate": 1.2329350467107925e-06, + "loss": 0.1853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1913260966539383, + "step": 3940, + "valid_targets_mean": 4414.9, + "valid_targets_min": 1948 + }, + { + "epoch": 6.301916932907348, + "grad_norm": 0.5928095486490488, + "learning_rate": 1.2055420908362781e-06, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15695932507514954, + "step": 3945, + "valid_targets_mean": 5036.8, + "valid_targets_min": 587 + }, + { + "epoch": 6.3099041533546325, + "grad_norm": 0.7806745270379135, + "learning_rate": 1.1784474089830612e-06, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.141230046749115, + "step": 3950, + "valid_targets_mean": 3909.8, + "valid_targets_min": 803 + }, + { + "epoch": 6.317891373801917, + "grad_norm": 0.5241502993360989, + "learning_rate": 1.1516514311523607e-06, + "loss": 0.1572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1841631531715393, + "step": 3955, + "valid_targets_mean": 4971.7, + "valid_targets_min": 1007 + }, + { + "epoch": 6.325878594249201, + "grad_norm": 0.44293207636192655, + "learning_rate": 1.1251545826048593e-06, + "loss": 0.1627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17101489007472992, + "step": 3960, + "valid_targets_mean": 5828.6, + "valid_targets_min": 2349 + }, + { + "epoch": 6.333865814696486, + "grad_norm": 0.5147967978992214, + "learning_rate": 1.098957283853972e-06, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15055087208747864, + "step": 3965, + "valid_targets_mean": 4281.2, + "valid_targets_min": 813 + }, + { + "epoch": 6.34185303514377, + "grad_norm": 0.5053144291349435, + "learning_rate": 1.0730599506591476e-06, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15987616777420044, + "step": 3970, + "valid_targets_mean": 4819.4, + "valid_targets_min": 656 + }, + { + "epoch": 6.3498402555910545, + "grad_norm": 0.49378772224517475, + "learning_rate": 1.0474629940192994e-06, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17134281992912292, + "step": 3975, + "valid_targets_mean": 5614.7, + "valid_targets_min": 936 + }, + { + "epoch": 6.357827476038339, + "grad_norm": 0.4942524157645037, + "learning_rate": 1.0221668201662726e-06, + "loss": 0.1573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1388753205537796, + "step": 3980, + "valid_targets_mean": 4453.8, + "valid_targets_min": 958 + }, + { + "epoch": 6.365814696485623, + "grad_norm": 0.47105231810783227, + "learning_rate": 9.971718305583767e-07, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1350238174200058, + "step": 3985, + "valid_targets_mean": 4756.4, + "valid_targets_min": 651 + }, + { + "epoch": 6.373801916932908, + "grad_norm": 0.4797868903591038, + "learning_rate": 9.724784218740524e-07, + "loss": 0.163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18055656552314758, + "step": 3990, + "valid_targets_mean": 5385.4, + "valid_targets_min": 2538 + }, + { + "epoch": 6.381789137380192, + "grad_norm": 0.546317463292646, + "learning_rate": 9.480869860055364e-07, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12972435355186462, + "step": 3995, + "valid_targets_mean": 5110.4, + "valid_targets_min": 828 + }, + { + "epoch": 6.389776357827476, + "grad_norm": 0.501652799871782, + "learning_rate": 9.239979100526763e-07, + "loss": 0.1544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16283018887043, + "step": 4000, + "valid_targets_mean": 5847.4, + "valid_targets_min": 1265 + }, + { + "epoch": 6.397763578274761, + "grad_norm": 0.5123995362461974, + "learning_rate": 9.002115763167585e-07, + "loss": 0.1526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16418376564979553, + "step": 4005, + "valid_targets_mean": 4781.9, + "valid_targets_min": 483 + }, + { + "epoch": 6.405750798722044, + "grad_norm": 0.5655064938108871, + "learning_rate": 8.7672836229447e-07, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16947625577449799, + "step": 4010, + "valid_targets_mean": 4005.4, + "valid_targets_min": 791 + }, + { + "epoch": 6.413738019169329, + "grad_norm": 0.6651149588120067, + "learning_rate": 8.535486406718684e-07, + "loss": 0.1583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16407115757465363, + "step": 4015, + "valid_targets_mean": 5528.8, + "valid_targets_min": 1297 + }, + { + "epoch": 6.421725239616613, + "grad_norm": 0.5062861693449161, + "learning_rate": 8.306727793185132e-07, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15367011725902557, + "step": 4020, + "valid_targets_mean": 4609.2, + "valid_targets_min": 1156 + }, + { + "epoch": 6.4297124600638975, + "grad_norm": 0.5252283287798611, + "learning_rate": 8.081011412815965e-07, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18086940050125122, + "step": 4025, + "valid_targets_mean": 4623.1, + "valid_targets_min": 797 + }, + { + "epoch": 6.437699680511182, + "grad_norm": 0.5181451559409599, + "learning_rate": 7.858340847801815e-07, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.160737544298172, + "step": 4030, + "valid_targets_mean": 4243.2, + "valid_targets_min": 1583 + }, + { + "epoch": 6.445686900958466, + "grad_norm": 0.5212666018139555, + "learning_rate": 7.638719631995406e-07, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14286597073078156, + "step": 4035, + "valid_targets_mean": 4253.1, + "valid_targets_min": 2188 + }, + { + "epoch": 6.453674121405751, + "grad_norm": 0.5004735693367742, + "learning_rate": 7.422151250855214e-07, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15612217783927917, + "step": 4040, + "valid_targets_mean": 4539.6, + "valid_targets_min": 1364 + }, + { + "epoch": 6.461661341853035, + "grad_norm": 0.5228627437968858, + "learning_rate": 7.208639141390295e-07, + "loss": 0.1553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16546684503555298, + "step": 4045, + "valid_targets_mean": 4428.9, + "valid_targets_min": 819 + }, + { + "epoch": 6.4696485623003195, + "grad_norm": 0.44743730236732104, + "learning_rate": 6.998186692105657e-07, + "loss": 0.1544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15039759874343872, + "step": 4050, + "valid_targets_mean": 5451.6, + "valid_targets_min": 1905 + }, + { + "epoch": 6.477635782747604, + "grad_norm": 0.4502056621596025, + "learning_rate": 6.790797242948644e-07, + "loss": 0.1606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15037524700164795, + "step": 4055, + "valid_targets_mean": 5427.0, + "valid_targets_min": 1897 + }, + { + "epoch": 6.485623003194888, + "grad_norm": 0.4293053831010077, + "learning_rate": 6.5864740852557e-07, + "loss": 0.155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16043220460414886, + "step": 4060, + "valid_targets_mean": 5719.5, + "valid_targets_min": 757 + }, + { + "epoch": 6.493610223642173, + "grad_norm": 0.47108992924603316, + "learning_rate": 6.385220461700248e-07, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16741064190864563, + "step": 4065, + "valid_targets_mean": 5453.7, + "valid_targets_min": 1758 + }, + { + "epoch": 6.501597444089457, + "grad_norm": 0.47782793699178533, + "learning_rate": 6.187039566241337e-07, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1436358541250229, + "step": 4070, + "valid_targets_mean": 5147.2, + "valid_targets_min": 423 + }, + { + "epoch": 6.5095846645367414, + "grad_norm": 0.5097003346397011, + "learning_rate": 5.99193454407272e-07, + "loss": 0.1498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.145539790391922, + "step": 4075, + "valid_targets_mean": 4628.7, + "valid_targets_min": 1347 + }, + { + "epoch": 6.517571884984026, + "grad_norm": 0.4885563890134189, + "learning_rate": 5.799908491573148e-07, + "loss": 0.1713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16930031776428223, + "step": 4080, + "valid_targets_mean": 4541.6, + "valid_targets_min": 1679 + }, + { + "epoch": 6.52555910543131, + "grad_norm": 0.5156008309314725, + "learning_rate": 5.610964456257107e-07, + "loss": 0.1545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18574655055999756, + "step": 4085, + "valid_targets_mean": 4505.2, + "valid_targets_min": 1407 + }, + { + "epoch": 6.533546325878595, + "grad_norm": 0.6193155974120058, + "learning_rate": 5.425105436726496e-07, + "loss": 0.1556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1539543867111206, + "step": 4090, + "valid_targets_mean": 4921.3, + "valid_targets_min": 818 + }, + { + "epoch": 6.541533546325878, + "grad_norm": 0.48499335224731416, + "learning_rate": 5.242334382623004e-07, + "loss": 0.1637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13866549730300903, + "step": 4095, + "valid_targets_mean": 5624.8, + "valid_targets_min": 1141 + }, + { + "epoch": 6.549520766773163, + "grad_norm": 0.49424624741619433, + "learning_rate": 5.062654194581429e-07, + "loss": 0.1553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1799236238002777, + "step": 4100, + "valid_targets_mean": 4593.4, + "valid_targets_min": 1548 + }, + { + "epoch": 6.557507987220447, + "grad_norm": 0.44905955898824995, + "learning_rate": 4.88606772418343e-07, + "loss": 0.1509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15588271617889404, + "step": 4105, + "valid_targets_mean": 5042.8, + "valid_targets_min": 1170 + }, + { + "epoch": 6.565495207667731, + "grad_norm": 0.520702776011052, + "learning_rate": 4.7125777739123857e-07, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20354077219963074, + "step": 4110, + "valid_targets_mean": 5167.0, + "valid_targets_min": 1373 + }, + { + "epoch": 6.573482428115016, + "grad_norm": 0.4435861783742549, + "learning_rate": 4.54218709710903e-07, + "loss": 0.14, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14058135449886322, + "step": 4115, + "valid_targets_mean": 5767.5, + "valid_targets_min": 2807 + }, + { + "epoch": 6.5814696485623, + "grad_norm": 0.552138536766462, + "learning_rate": 4.374898397927507e-07, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17940273880958557, + "step": 4120, + "valid_targets_mean": 4240.6, + "valid_targets_min": 657 + }, + { + "epoch": 6.5894568690095845, + "grad_norm": 0.4934760675953949, + "learning_rate": 4.210714331292698e-07, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14860069751739502, + "step": 4125, + "valid_targets_mean": 4598.2, + "valid_targets_min": 1648 + }, + { + "epoch": 6.597444089456869, + "grad_norm": 0.47557123063031764, + "learning_rate": 4.049637502857895e-07, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16057202219963074, + "step": 4130, + "valid_targets_mean": 5109.9, + "valid_targets_min": 1470 + }, + { + "epoch": 6.605431309904153, + "grad_norm": 0.48824188715889366, + "learning_rate": 3.8916704689635707e-07, + "loss": 0.1522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1606670618057251, + "step": 4135, + "valid_targets_mean": 4935.2, + "valid_targets_min": 827 + }, + { + "epoch": 6.613418530351438, + "grad_norm": 0.584193072096277, + "learning_rate": 3.736815736596766e-07, + "loss": 0.1567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18314039707183838, + "step": 4140, + "valid_targets_mean": 4389.9, + "valid_targets_min": 455 + }, + { + "epoch": 6.621405750798722, + "grad_norm": 0.5239645121482677, + "learning_rate": 3.5850757633513424e-07, + "loss": 0.1515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15261167287826538, + "step": 4145, + "valid_targets_mean": 3911.5, + "valid_targets_min": 723 + }, + { + "epoch": 6.6293929712460065, + "grad_norm": 0.49798914942498246, + "learning_rate": 3.4364529573888803e-07, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1832718402147293, + "step": 4150, + "valid_targets_mean": 4998.7, + "valid_targets_min": 1378 + }, + { + "epoch": 6.637380191693291, + "grad_norm": 0.5370351684175654, + "learning_rate": 3.2909496774005344e-07, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14727014303207397, + "step": 4155, + "valid_targets_mean": 4993.6, + "valid_targets_min": 1859 + }, + { + "epoch": 6.645367412140575, + "grad_norm": 0.5376065934233798, + "learning_rate": 3.14856823256966e-07, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13972601294517517, + "step": 4160, + "valid_targets_mean": 4538.9, + "valid_targets_min": 1760 + }, + { + "epoch": 6.65335463258786, + "grad_norm": 0.4879257262202097, + "learning_rate": 3.009310882534999e-07, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18523406982421875, + "step": 4165, + "valid_targets_mean": 5296.8, + "valid_targets_min": 1050 + }, + { + "epoch": 6.661341853035144, + "grad_norm": 0.5804989494397974, + "learning_rate": 2.8731798373550004e-07, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1688893437385559, + "step": 4170, + "valid_targets_mean": 4406.4, + "valid_targets_min": 762 + }, + { + "epoch": 6.669329073482428, + "grad_norm": 0.46347021872106603, + "learning_rate": 2.740177257472576e-07, + "loss": 0.1562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1542181670665741, + "step": 4175, + "valid_targets_mean": 5176.9, + "valid_targets_min": 640 + }, + { + "epoch": 6.677316293929713, + "grad_norm": 0.48527977721610366, + "learning_rate": 2.6103052536810226e-07, + "loss": 0.1532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16154858469963074, + "step": 4180, + "valid_targets_mean": 5032.2, + "valid_targets_min": 1075 + }, + { + "epoch": 6.685303514376997, + "grad_norm": 0.5207002345996271, + "learning_rate": 2.4835658870902226e-07, + "loss": 0.1762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18678952753543854, + "step": 4185, + "valid_targets_mean": 4774.8, + "valid_targets_min": 623 + }, + { + "epoch": 6.693290734824281, + "grad_norm": 0.4900949681899387, + "learning_rate": 2.3599611690943158e-07, + "loss": 0.1657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14106178283691406, + "step": 4190, + "valid_targets_mean": 4770.2, + "valid_targets_min": 1290 + }, + { + "epoch": 6.701277955271565, + "grad_norm": 0.5658605830474439, + "learning_rate": 2.2394930613393927e-07, + "loss": 0.1595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19314196705818176, + "step": 4195, + "valid_targets_mean": 4287.3, + "valid_targets_min": 1148 + }, + { + "epoch": 6.7092651757188495, + "grad_norm": 0.45153452050782694, + "learning_rate": 2.122163475692629e-07, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15588468313217163, + "step": 4200, + "valid_targets_mean": 4994.2, + "valid_targets_min": 1568 + }, + { + "epoch": 6.717252396166134, + "grad_norm": 0.4790409003168309, + "learning_rate": 2.0079742742118878e-07, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17813389003276825, + "step": 4205, + "valid_targets_mean": 5793.6, + "valid_targets_min": 2043 + }, + { + "epoch": 6.725239616613418, + "grad_norm": 0.48525249473687376, + "learning_rate": 1.8969272691160334e-07, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1606864333152771, + "step": 4210, + "valid_targets_mean": 5174.8, + "valid_targets_min": 959 + }, + { + "epoch": 6.733226837060703, + "grad_norm": 0.5323491003812912, + "learning_rate": 1.789024222756397e-07, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16105014085769653, + "step": 4215, + "valid_targets_mean": 3702.8, + "valid_targets_min": 1032 + }, + { + "epoch": 6.741214057507987, + "grad_norm": 0.4968258466615749, + "learning_rate": 1.6842668475885782e-07, + "loss": 0.1462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14075995981693268, + "step": 4220, + "valid_targets_mean": 4199.2, + "valid_targets_min": 1313 + }, + { + "epoch": 6.7492012779552715, + "grad_norm": 0.4925534642983092, + "learning_rate": 1.582656806145444e-07, + "loss": 0.1639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15862902998924255, + "step": 4225, + "valid_targets_mean": 4477.2, + "valid_targets_min": 1815 + }, + { + "epoch": 6.757188498402556, + "grad_norm": 0.550218936446372, + "learning_rate": 1.4841957110106388e-07, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18082785606384277, + "step": 4230, + "valid_targets_mean": 5861.3, + "valid_targets_min": 940 + }, + { + "epoch": 6.76517571884984, + "grad_norm": 0.4781570840237632, + "learning_rate": 1.388885124793049e-07, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15590888261795044, + "step": 4235, + "valid_targets_mean": 4668.6, + "valid_targets_min": 695 + }, + { + "epoch": 6.773162939297125, + "grad_norm": 0.5355126036837008, + "learning_rate": 1.2967265601019573e-07, + "loss": 0.1448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14738382399082184, + "step": 4240, + "valid_targets_mean": 3881.4, + "valid_targets_min": 337 + }, + { + "epoch": 6.781150159744409, + "grad_norm": 0.4581319359373992, + "learning_rate": 1.207721479523105e-07, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12903811037540436, + "step": 4245, + "valid_targets_mean": 4758.4, + "valid_targets_min": 2440 + }, + { + "epoch": 6.789137380191693, + "grad_norm": 0.5172271808327057, + "learning_rate": 1.1218712955954003e-07, + "loss": 0.1591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14677739143371582, + "step": 4250, + "valid_targets_mean": 3776.4, + "valid_targets_min": 592 + }, + { + "epoch": 6.797124600638978, + "grad_norm": 0.530851817590877, + "learning_rate": 1.0391773707885578e-07, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1520860195159912, + "step": 4255, + "valid_targets_mean": 3847.8, + "valid_targets_min": 521 + }, + { + "epoch": 6.805111821086262, + "grad_norm": 0.5796714007260149, + "learning_rate": 9.596410174814497e-08, + "loss": 0.1503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.144109845161438, + "step": 4260, + "valid_targets_mean": 4218.2, + "valid_targets_min": 543 + }, + { + "epoch": 6.813099041533547, + "grad_norm": 0.5184630243827649, + "learning_rate": 8.83263497941278e-08, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16317498683929443, + "step": 4265, + "valid_targets_mean": 4504.2, + "valid_targets_min": 1386 + }, + { + "epoch": 6.821086261980831, + "grad_norm": 0.5295419275859305, + "learning_rate": 8.10046024303568e-08, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1961647868156433, + "step": 4270, + "valid_targets_mean": 5160.0, + "valid_targets_min": 2117 + }, + { + "epoch": 6.8290734824281145, + "grad_norm": 0.515399801707447, + "learning_rate": 7.399897585528726e-08, + "loss": 0.1586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17954765260219574, + "step": 4275, + "valid_targets_mean": 4397.3, + "valid_targets_min": 1475 + }, + { + "epoch": 6.8370607028754, + "grad_norm": 0.48082799148327066, + "learning_rate": 6.730958125044319e-08, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15425550937652588, + "step": 4280, + "valid_targets_mean": 4867.4, + "valid_targets_min": 902 + }, + { + "epoch": 6.845047923322683, + "grad_norm": 0.5125347064202592, + "learning_rate": 6.093652477864309e-08, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17881545424461365, + "step": 4285, + "valid_targets_mean": 4752.0, + "valid_targets_min": 1767 + }, + { + "epoch": 6.853035143769968, + "grad_norm": 0.44550266769959085, + "learning_rate": 5.4879907582316986e-08, + "loss": 0.1553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1376715898513794, + "step": 4290, + "valid_targets_mean": 5008.2, + "valid_targets_min": 1827 + }, + { + "epoch": 6.861022364217252, + "grad_norm": 0.49785912971046536, + "learning_rate": 4.913982578190535e-08, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18554158508777618, + "step": 4295, + "valid_targets_mean": 5076.2, + "valid_targets_min": 1699 + }, + { + "epoch": 6.8690095846645365, + "grad_norm": 0.434012862335176, + "learning_rate": 4.3716370474331527e-08, + "loss": 0.1533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15761414170265198, + "step": 4300, + "valid_targets_mean": 5683.8, + "valid_targets_min": 2173 + }, + { + "epoch": 6.876996805111821, + "grad_norm": 0.4600701812971065, + "learning_rate": 3.8609627731558405e-08, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14310933649539948, + "step": 4305, + "valid_targets_mean": 5604.4, + "valid_targets_min": 1873 + }, + { + "epoch": 6.884984025559105, + "grad_norm": 0.7704497916826207, + "learning_rate": 3.381967859920954e-08, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16258737444877625, + "step": 4310, + "valid_targets_mean": 5497.0, + "valid_targets_min": 1359 + }, + { + "epoch": 6.89297124600639, + "grad_norm": 0.4657209311348018, + "learning_rate": 2.9346599095305685e-08, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17536340653896332, + "step": 4315, + "valid_targets_mean": 5737.7, + "valid_targets_min": 784 + }, + { + "epoch": 6.900958466453674, + "grad_norm": 0.4473310586041997, + "learning_rate": 2.5190460209039146e-08, + "loss": 0.1645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1598825305700302, + "step": 4320, + "valid_targets_mean": 5702.4, + "valid_targets_min": 835 + }, + { + "epoch": 6.9089456869009584, + "grad_norm": 0.6716035094415955, + "learning_rate": 2.1351327899656883e-08, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20283496379852295, + "step": 4325, + "valid_targets_mean": 4717.5, + "valid_targets_min": 372 + }, + { + "epoch": 6.916932907348243, + "grad_norm": 0.5446415374246868, + "learning_rate": 1.782926309540578e-08, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16346979141235352, + "step": 4330, + "valid_targets_mean": 4082.8, + "valid_targets_min": 1344 + }, + { + "epoch": 6.924920127795527, + "grad_norm": 0.5727743626029946, + "learning_rate": 1.462432169257344e-08, + "loss": 0.1695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15288791060447693, + "step": 4335, + "valid_targets_mean": 5043.2, + "valid_targets_min": 333 + }, + { + "epoch": 6.932907348242812, + "grad_norm": 0.4857450702797858, + "learning_rate": 1.1736554554604429e-08, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1811220347881317, + "step": 4340, + "valid_targets_mean": 5184.7, + "valid_targets_min": 1178 + }, + { + "epoch": 6.940894568690096, + "grad_norm": 0.5405114561331039, + "learning_rate": 9.166007511274278e-09, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.145079106092453, + "step": 4345, + "valid_targets_mean": 5249.0, + "valid_targets_min": 1114 + }, + { + "epoch": 6.94888178913738, + "grad_norm": 0.5207983988424999, + "learning_rate": 6.912721357985597e-09, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15930955111980438, + "step": 4350, + "valid_targets_mean": 4517.9, + "valid_targets_min": 794 + }, + { + "epoch": 6.956869009584665, + "grad_norm": 0.5174766841014377, + "learning_rate": 4.976731855104166e-09, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13806484639644623, + "step": 4355, + "valid_targets_mean": 4797.9, + "valid_targets_min": 2094 + }, + { + "epoch": 6.964856230031949, + "grad_norm": 0.5135301894652287, + "learning_rate": 3.3580697274016028e-09, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17942970991134644, + "step": 4360, + "valid_targets_mean": 4651.2, + "valid_targets_min": 1438 + }, + { + "epoch": 6.972843450479234, + "grad_norm": 0.5218654864574037, + "learning_rate": 2.056760663555757e-09, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1661636233329773, + "step": 4365, + "valid_targets_mean": 4953.9, + "valid_targets_min": 857 + }, + { + "epoch": 6.980830670926517, + "grad_norm": 0.5617892482358111, + "learning_rate": 1.0728253157599178e-09, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16045665740966797, + "step": 4370, + "valid_targets_mean": 3920.1, + "valid_targets_min": 288 + }, + { + "epoch": 6.988817891373802, + "grad_norm": 0.6107885657111256, + "learning_rate": 4.062792993786424e-10, + "loss": 0.1616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16254091262817383, + "step": 4375, + "valid_targets_mean": 3999.9, + "valid_targets_min": 614 + }, + { + "epoch": 6.996805111821086, + "grad_norm": 0.4316133436358314, + "learning_rate": 5.7133192707947705e-11, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13435956835746765, + "step": 4380, + "valid_targets_mean": 5564.4, + "valid_targets_min": 568 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1903749257326126, + "step": 4382, + "total_flos": 1474549610774528.0, + "train_loss": 0.20471333478722384, + "train_runtime": 24590.8199, + "train_samples_per_second": 2.851, + "train_steps_per_second": 0.178, + "valid_targets_mean": 5778.1, + "valid_targets_min": 3072 + } + ], + "logging_steps": 5, + "max_steps": 4382, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1474549610774528.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}