diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6922 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 3125, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 5.049601892729115, + "learning_rate": 5.111821086261981e-07, + "loss": 0.7278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43948009610176086, + "step": 5, + "valid_targets_mean": 7209.1, + "valid_targets_min": 1277 + }, + { + "epoch": 0.016, + "grad_norm": 5.277364451906386, + "learning_rate": 1.1501597444089457e-06, + "loss": 0.6908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30664390325546265, + "step": 10, + "valid_targets_mean": 3595.1, + "valid_targets_min": 964 + }, + { + "epoch": 0.024, + "grad_norm": 3.9698434226273944, + "learning_rate": 1.7891373801916933e-06, + "loss": 0.6463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3173980712890625, + "step": 15, + "valid_targets_mean": 4277.0, + "valid_targets_min": 1066 + }, + { + "epoch": 0.032, + "grad_norm": 4.147563403598727, + "learning_rate": 2.428115015974441e-06, + "loss": 0.6988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3903578519821167, + "step": 20, + "valid_targets_mean": 4019.9, + "valid_targets_min": 1172 + }, + { + "epoch": 0.04, + "grad_norm": 2.579151645965659, + "learning_rate": 3.0670926517571885e-06, + "loss": 0.653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23339255154132843, + "step": 25, + "valid_targets_mean": 2710.8, + "valid_targets_min": 854 + }, + { + "epoch": 0.048, + "grad_norm": 1.6363825890362744, + "learning_rate": 3.7060702875399364e-06, + "loss": 0.6401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2710292339324951, + "step": 30, + "valid_targets_mean": 4470.5, + "valid_targets_min": 1069 + }, + { + "epoch": 0.056, + "grad_norm": 1.093070288309869, + "learning_rate": 4.345047923322684e-06, + "loss": 0.5552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2130659818649292, + "step": 35, + "valid_targets_mean": 2896.5, + "valid_targets_min": 757 + }, + { + "epoch": 0.064, + "grad_norm": 0.8596561845629843, + "learning_rate": 4.984025559105431e-06, + "loss": 0.5892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3451421856880188, + "step": 40, + "valid_targets_mean": 6655.2, + "valid_targets_min": 1658 + }, + { + "epoch": 0.072, + "grad_norm": 0.720981615728207, + "learning_rate": 5.623003194888179e-06, + "loss": 0.5639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23500263690948486, + "step": 45, + "valid_targets_mean": 4629.5, + "valid_targets_min": 1128 + }, + { + "epoch": 0.08, + "grad_norm": 0.7648020352559365, + "learning_rate": 6.261980830670928e-06, + "loss": 0.546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28623056411743164, + "step": 50, + "valid_targets_mean": 4455.5, + "valid_targets_min": 557 + }, + { + "epoch": 0.088, + "grad_norm": 0.8070597567729144, + "learning_rate": 6.900958466453675e-06, + "loss": 0.5496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21983392536640167, + "step": 55, + "valid_targets_mean": 2054.8, + "valid_targets_min": 867 + }, + { + "epoch": 0.096, + "grad_norm": 0.5770805999001849, + "learning_rate": 7.5399361022364225e-06, + "loss": 0.5428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2292506992816925, + "step": 60, + "valid_targets_mean": 4125.5, + "valid_targets_min": 699 + }, + { + "epoch": 0.104, + "grad_norm": 0.5971913839474638, + "learning_rate": 8.17891373801917e-06, + "loss": 0.4938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20313440263271332, + "step": 65, + "valid_targets_mean": 3079.9, + "valid_targets_min": 756 + }, + { + "epoch": 0.112, + "grad_norm": 0.686250956100844, + "learning_rate": 8.817891373801917e-06, + "loss": 0.5128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2895665466785431, + "step": 70, + "valid_targets_mean": 2778.6, + "valid_targets_min": 667 + }, + { + "epoch": 0.12, + "grad_norm": 0.6575289237390317, + "learning_rate": 9.456869009584665e-06, + "loss": 0.5072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24841192364692688, + "step": 75, + "valid_targets_mean": 3479.6, + "valid_targets_min": 661 + }, + { + "epoch": 0.128, + "grad_norm": 0.6049593114695113, + "learning_rate": 1.0095846645367413e-05, + "loss": 0.4851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2605184018611908, + "step": 80, + "valid_targets_mean": 3057.5, + "valid_targets_min": 1042 + }, + { + "epoch": 0.136, + "grad_norm": 0.594174930790963, + "learning_rate": 1.073482428115016e-05, + "loss": 0.475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12189821153879166, + "step": 85, + "valid_targets_mean": 2004.9, + "valid_targets_min": 849 + }, + { + "epoch": 0.144, + "grad_norm": 0.6352082822362639, + "learning_rate": 1.1373801916932907e-05, + "loss": 0.4988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3049646317958832, + "step": 90, + "valid_targets_mean": 3373.8, + "valid_targets_min": 1120 + }, + { + "epoch": 0.152, + "grad_norm": 0.6122762651129019, + "learning_rate": 1.2012779552715656e-05, + "loss": 0.5111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32920581102371216, + "step": 95, + "valid_targets_mean": 3691.1, + "valid_targets_min": 840 + }, + { + "epoch": 0.16, + "grad_norm": 0.4919427231068009, + "learning_rate": 1.2651757188498404e-05, + "loss": 0.4916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16588306427001953, + "step": 100, + "valid_targets_mean": 4110.8, + "valid_targets_min": 665 + }, + { + "epoch": 0.168, + "grad_norm": 0.567123357639076, + "learning_rate": 1.329073482428115e-05, + "loss": 0.5458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2980887293815613, + "step": 105, + "valid_targets_mean": 4432.4, + "valid_targets_min": 716 + }, + { + "epoch": 0.176, + "grad_norm": 0.5563571872329078, + "learning_rate": 1.39297124600639e-05, + "loss": 0.4754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30753272771835327, + "step": 110, + "valid_targets_mean": 5548.8, + "valid_targets_min": 1114 + }, + { + "epoch": 0.184, + "grad_norm": 0.5033412953896856, + "learning_rate": 1.4568690095846648e-05, + "loss": 0.4713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19983811676502228, + "step": 115, + "valid_targets_mean": 3808.8, + "valid_targets_min": 1071 + }, + { + "epoch": 0.192, + "grad_norm": 0.5542569365948462, + "learning_rate": 1.5207667731629394e-05, + "loss": 0.4201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1442127823829651, + "step": 120, + "valid_targets_mean": 1944.8, + "valid_targets_min": 727 + }, + { + "epoch": 0.2, + "grad_norm": 0.5369920791073214, + "learning_rate": 1.584664536741214e-05, + "loss": 0.4804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21610140800476074, + "step": 125, + "valid_targets_mean": 4456.5, + "valid_targets_min": 938 + }, + { + "epoch": 0.208, + "grad_norm": 0.5934402873809017, + "learning_rate": 1.648562300319489e-05, + "loss": 0.4488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21678262948989868, + "step": 130, + "valid_targets_mean": 2704.4, + "valid_targets_min": 1028 + }, + { + "epoch": 0.216, + "grad_norm": 0.6167459028653418, + "learning_rate": 1.712460063897764e-05, + "loss": 0.4635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26390573382377625, + "step": 135, + "valid_targets_mean": 3041.1, + "valid_targets_min": 690 + }, + { + "epoch": 0.224, + "grad_norm": 0.6057571170803768, + "learning_rate": 1.7763578274760385e-05, + "loss": 0.473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2703923285007477, + "step": 140, + "valid_targets_mean": 3405.5, + "valid_targets_min": 1388 + }, + { + "epoch": 0.232, + "grad_norm": 0.5964087448191424, + "learning_rate": 1.840255591054313e-05, + "loss": 0.4281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12820664048194885, + "step": 145, + "valid_targets_mean": 2341.5, + "valid_targets_min": 548 + }, + { + "epoch": 0.24, + "grad_norm": 0.7944123967649483, + "learning_rate": 1.904153354632588e-05, + "loss": 0.4823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3033648133277893, + "step": 150, + "valid_targets_mean": 2520.9, + "valid_targets_min": 945 + }, + { + "epoch": 0.248, + "grad_norm": 0.6692216622848669, + "learning_rate": 1.9680511182108627e-05, + "loss": 0.4764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.352511465549469, + "step": 155, + "valid_targets_mean": 3567.8, + "valid_targets_min": 1011 + }, + { + "epoch": 0.256, + "grad_norm": 0.5976706030380227, + "learning_rate": 2.0319488817891376e-05, + "loss": 0.4391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25001269578933716, + "step": 160, + "valid_targets_mean": 3181.8, + "valid_targets_min": 1389 + }, + { + "epoch": 0.264, + "grad_norm": 1.0239772828743636, + "learning_rate": 2.0958466453674126e-05, + "loss": 0.446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16786524653434753, + "step": 165, + "valid_targets_mean": 3835.6, + "valid_targets_min": 1012 + }, + { + "epoch": 0.272, + "grad_norm": 0.42177417682653895, + "learning_rate": 2.1597444089456872e-05, + "loss": 0.4304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19490358233451843, + "step": 170, + "valid_targets_mean": 6332.6, + "valid_targets_min": 948 + }, + { + "epoch": 0.28, + "grad_norm": 0.5895166731709086, + "learning_rate": 2.2236421725239618e-05, + "loss": 0.465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22322815656661987, + "step": 175, + "valid_targets_mean": 3828.6, + "valid_targets_min": 947 + }, + { + "epoch": 0.288, + "grad_norm": 0.6030927972832155, + "learning_rate": 2.2875399361022364e-05, + "loss": 0.3898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1875823736190796, + "step": 180, + "valid_targets_mean": 3343.1, + "valid_targets_min": 790 + }, + { + "epoch": 0.296, + "grad_norm": 0.45033732473663274, + "learning_rate": 2.3514376996805114e-05, + "loss": 0.4055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16341853141784668, + "step": 185, + "valid_targets_mean": 4346.8, + "valid_targets_min": 621 + }, + { + "epoch": 0.304, + "grad_norm": 0.6410357135895351, + "learning_rate": 2.415335463258786e-05, + "loss": 0.451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2382049858570099, + "step": 190, + "valid_targets_mean": 2598.9, + "valid_targets_min": 869 + }, + { + "epoch": 0.312, + "grad_norm": 0.40862470776459403, + "learning_rate": 2.4792332268370606e-05, + "loss": 0.4398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19033300876617432, + "step": 195, + "valid_targets_mean": 8101.2, + "valid_targets_min": 1425 + }, + { + "epoch": 0.32, + "grad_norm": 0.4639247345136467, + "learning_rate": 2.543130990415336e-05, + "loss": 0.4575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1774352490901947, + "step": 200, + "valid_targets_mean": 3866.1, + "valid_targets_min": 1002 + }, + { + "epoch": 0.328, + "grad_norm": 0.6045432758950621, + "learning_rate": 2.6070287539936105e-05, + "loss": 0.4629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20918956398963928, + "step": 205, + "valid_targets_mean": 4064.6, + "valid_targets_min": 907 + }, + { + "epoch": 0.336, + "grad_norm": 0.49705632653844295, + "learning_rate": 2.670926517571885e-05, + "loss": 0.4128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20427216589450836, + "step": 210, + "valid_targets_mean": 5069.9, + "valid_targets_min": 701 + }, + { + "epoch": 0.344, + "grad_norm": 0.48520489500855396, + "learning_rate": 2.73482428115016e-05, + "loss": 0.4252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34422576427459717, + "step": 215, + "valid_targets_mean": 7910.2, + "valid_targets_min": 1506 + }, + { + "epoch": 0.352, + "grad_norm": 0.4668215278946658, + "learning_rate": 2.7987220447284347e-05, + "loss": 0.4237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21643120050430298, + "step": 220, + "valid_targets_mean": 5393.4, + "valid_targets_min": 737 + }, + { + "epoch": 0.36, + "grad_norm": 0.5943792588027108, + "learning_rate": 2.8626198083067093e-05, + "loss": 0.4247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13178721070289612, + "step": 225, + "valid_targets_mean": 2101.8, + "valid_targets_min": 858 + }, + { + "epoch": 0.368, + "grad_norm": 0.5517991295062519, + "learning_rate": 2.9265175718849843e-05, + "loss": 0.3864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14202260971069336, + "step": 230, + "valid_targets_mean": 2987.0, + "valid_targets_min": 814 + }, + { + "epoch": 0.376, + "grad_norm": 0.6080233680136686, + "learning_rate": 2.9904153354632592e-05, + "loss": 0.4359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2212154120206833, + "step": 235, + "valid_targets_mean": 3522.0, + "valid_targets_min": 1157 + }, + { + "epoch": 0.384, + "grad_norm": 0.7001104625755565, + "learning_rate": 3.054313099041534e-05, + "loss": 0.4193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32441866397857666, + "step": 240, + "valid_targets_mean": 3398.5, + "valid_targets_min": 700 + }, + { + "epoch": 0.392, + "grad_norm": 0.5240312255698709, + "learning_rate": 3.1182108626198084e-05, + "loss": 0.4328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23227915167808533, + "step": 245, + "valid_targets_mean": 4712.2, + "valid_targets_min": 666 + }, + { + "epoch": 0.4, + "grad_norm": 0.8648496001859342, + "learning_rate": 3.1821086261980834e-05, + "loss": 0.4262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19384440779685974, + "step": 250, + "valid_targets_mean": 1589.8, + "valid_targets_min": 634 + }, + { + "epoch": 0.408, + "grad_norm": 0.5041946092525469, + "learning_rate": 3.246006389776358e-05, + "loss": 0.3766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23754949867725372, + "step": 255, + "valid_targets_mean": 5545.6, + "valid_targets_min": 697 + }, + { + "epoch": 0.416, + "grad_norm": 0.741812736773093, + "learning_rate": 3.3099041533546326e-05, + "loss": 0.4342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19546037912368774, + "step": 260, + "valid_targets_mean": 1842.6, + "valid_targets_min": 753 + }, + { + "epoch": 0.424, + "grad_norm": 0.6249635080823627, + "learning_rate": 3.3738019169329076e-05, + "loss": 0.3958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3015894591808319, + "step": 265, + "valid_targets_mean": 3354.1, + "valid_targets_min": 688 + }, + { + "epoch": 0.432, + "grad_norm": 0.5157995715206686, + "learning_rate": 3.4376996805111825e-05, + "loss": 0.4649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23236235976219177, + "step": 270, + "valid_targets_mean": 4596.9, + "valid_targets_min": 1140 + }, + { + "epoch": 0.44, + "grad_norm": 0.6481359309360338, + "learning_rate": 3.5015974440894575e-05, + "loss": 0.4068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1774221956729889, + "step": 275, + "valid_targets_mean": 2685.8, + "valid_targets_min": 448 + }, + { + "epoch": 0.448, + "grad_norm": 0.5003694452381771, + "learning_rate": 3.565495207667732e-05, + "loss": 0.3946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20578652620315552, + "step": 280, + "valid_targets_mean": 5089.4, + "valid_targets_min": 1146 + }, + { + "epoch": 0.456, + "grad_norm": 0.5430128250712549, + "learning_rate": 3.629392971246007e-05, + "loss": 0.4262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19618654251098633, + "step": 285, + "valid_targets_mean": 2835.6, + "valid_targets_min": 1099 + }, + { + "epoch": 0.464, + "grad_norm": 0.5889443716013671, + "learning_rate": 3.6932907348242816e-05, + "loss": 0.4326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26432836055755615, + "step": 290, + "valid_targets_mean": 4703.8, + "valid_targets_min": 1265 + }, + { + "epoch": 0.472, + "grad_norm": 0.5008891357690028, + "learning_rate": 3.757188498402556e-05, + "loss": 0.4477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28413641452789307, + "step": 295, + "valid_targets_mean": 6156.9, + "valid_targets_min": 1117 + }, + { + "epoch": 0.48, + "grad_norm": 0.45277012879942824, + "learning_rate": 3.821086261980831e-05, + "loss": 0.4005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2018493264913559, + "step": 300, + "valid_targets_mean": 6251.2, + "valid_targets_min": 699 + }, + { + "epoch": 0.488, + "grad_norm": 0.5498771344672786, + "learning_rate": 3.884984025559106e-05, + "loss": 0.4067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20270295441150665, + "step": 305, + "valid_targets_mean": 3713.6, + "valid_targets_min": 426 + }, + { + "epoch": 0.496, + "grad_norm": 0.4849980047118573, + "learning_rate": 3.94888178913738e-05, + "loss": 0.4172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19320783019065857, + "step": 310, + "valid_targets_mean": 3616.2, + "valid_targets_min": 1182 + }, + { + "epoch": 0.504, + "grad_norm": 0.46547683956214736, + "learning_rate": 3.9999987518434296e-05, + "loss": 0.4113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16570322215557098, + "step": 315, + "valid_targets_mean": 4545.1, + "valid_targets_min": 894 + }, + { + "epoch": 0.512, + "grad_norm": 0.6433873513411175, + "learning_rate": 3.999955066527015e-05, + "loss": 0.4134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2329372763633728, + "step": 320, + "valid_targets_mean": 3225.2, + "valid_targets_min": 1171 + }, + { + "epoch": 0.52, + "grad_norm": 0.48867055877170756, + "learning_rate": 3.999848974939926e-05, + "loss": 0.3981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11635329574346542, + "step": 325, + "valid_targets_mean": 3431.9, + "valid_targets_min": 713 + }, + { + "epoch": 0.528, + "grad_norm": 1.4894122572179855, + "learning_rate": 3.999680480392626e-05, + "loss": 0.4087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18833759427070618, + "step": 330, + "valid_targets_mean": 3035.5, + "valid_targets_min": 732 + }, + { + "epoch": 0.536, + "grad_norm": 0.4193239467986382, + "learning_rate": 3.999449588142792e-05, + "loss": 0.4141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20873858034610748, + "step": 335, + "valid_targets_mean": 5798.9, + "valid_targets_min": 2283 + }, + { + "epoch": 0.544, + "grad_norm": 0.4322322156645337, + "learning_rate": 3.9991563053951476e-05, + "loss": 0.3762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15418782830238342, + "step": 340, + "valid_targets_mean": 4867.1, + "valid_targets_min": 913 + }, + { + "epoch": 0.552, + "grad_norm": 0.42579176413360825, + "learning_rate": 3.99880064130124e-05, + "loss": 0.3795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18619948625564575, + "step": 345, + "valid_targets_mean": 5330.1, + "valid_targets_min": 676 + }, + { + "epoch": 0.56, + "grad_norm": 0.5161509309473619, + "learning_rate": 3.9983826069591535e-05, + "loss": 0.4151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1781724989414215, + "step": 350, + "valid_targets_mean": 3472.8, + "valid_targets_min": 997 + }, + { + "epoch": 0.568, + "grad_norm": 0.6041811404254598, + "learning_rate": 3.997902215413163e-05, + "loss": 0.3963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30378806591033936, + "step": 355, + "valid_targets_mean": 3981.2, + "valid_targets_min": 912 + }, + { + "epoch": 0.576, + "grad_norm": 0.6498872075998322, + "learning_rate": 3.997359481653327e-05, + "loss": 0.407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20173442363739014, + "step": 360, + "valid_targets_mean": 2788.2, + "valid_targets_min": 878 + }, + { + "epoch": 0.584, + "grad_norm": 0.5061612683016131, + "learning_rate": 3.996754422615023e-05, + "loss": 0.3623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22666072845458984, + "step": 365, + "valid_targets_mean": 4118.0, + "valid_targets_min": 1373 + }, + { + "epoch": 0.592, + "grad_norm": 0.5361445261038215, + "learning_rate": 3.996087057178411e-05, + "loss": 0.4507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19703161716461182, + "step": 370, + "valid_targets_mean": 3619.8, + "valid_targets_min": 582 + }, + { + "epoch": 0.6, + "grad_norm": 0.672527479458296, + "learning_rate": 3.995357406167856e-05, + "loss": 0.4195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2972131371498108, + "step": 375, + "valid_targets_mean": 3516.8, + "valid_targets_min": 874 + }, + { + "epoch": 0.608, + "grad_norm": 0.44549432799665417, + "learning_rate": 3.994565492351267e-05, + "loss": 0.4056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17287719249725342, + "step": 380, + "valid_targets_mean": 5475.5, + "valid_targets_min": 1842 + }, + { + "epoch": 0.616, + "grad_norm": 0.6778440222156983, + "learning_rate": 3.993711340439394e-05, + "loss": 0.4242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21647445857524872, + "step": 385, + "valid_targets_mean": 3971.5, + "valid_targets_min": 1585 + }, + { + "epoch": 0.624, + "grad_norm": 0.4459566145405042, + "learning_rate": 3.9927949770850535e-05, + "loss": 0.3925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1151498481631279, + "step": 390, + "valid_targets_mean": 2951.2, + "valid_targets_min": 1088 + }, + { + "epoch": 0.632, + "grad_norm": 0.48331678470692757, + "learning_rate": 3.991816430882297e-05, + "loss": 0.3602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2142452448606491, + "step": 395, + "valid_targets_mean": 4534.9, + "valid_targets_min": 711 + }, + { + "epoch": 0.64, + "grad_norm": 0.5127689050837395, + "learning_rate": 3.9907757323655206e-05, + "loss": 0.4035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23935633897781372, + "step": 400, + "valid_targets_mean": 4701.5, + "valid_targets_min": 598 + }, + { + "epoch": 0.648, + "grad_norm": 0.4843857269179256, + "learning_rate": 3.98967291400851e-05, + "loss": 0.3784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25847354531288147, + "step": 405, + "valid_targets_mean": 5699.5, + "valid_targets_min": 879 + }, + { + "epoch": 0.656, + "grad_norm": 0.5005998339687705, + "learning_rate": 3.98850801022343e-05, + "loss": 0.3906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1377241611480713, + "step": 410, + "valid_targets_mean": 2504.0, + "valid_targets_min": 740 + }, + { + "epoch": 0.664, + "grad_norm": 0.4717110410055623, + "learning_rate": 3.987281057359746e-05, + "loss": 0.4076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20272189378738403, + "step": 415, + "valid_targets_mean": 5388.0, + "valid_targets_min": 1322 + }, + { + "epoch": 0.672, + "grad_norm": 0.5915926837830854, + "learning_rate": 3.985992093703096e-05, + "loss": 0.4205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16994090378284454, + "step": 420, + "valid_targets_mean": 2121.0, + "valid_targets_min": 784 + }, + { + "epoch": 0.68, + "grad_norm": 0.43255266985931967, + "learning_rate": 3.98464115947409e-05, + "loss": 0.3954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1824256181716919, + "step": 425, + "valid_targets_mean": 5472.2, + "valid_targets_min": 1140 + }, + { + "epoch": 0.688, + "grad_norm": 0.49054534785016624, + "learning_rate": 3.9832282968270595e-05, + "loss": 0.4021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1873103380203247, + "step": 430, + "valid_targets_mean": 3608.4, + "valid_targets_min": 735 + }, + { + "epoch": 0.696, + "grad_norm": 0.5313053332029032, + "learning_rate": 3.9817535498487385e-05, + "loss": 0.4255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2174983024597168, + "step": 435, + "valid_targets_mean": 3903.1, + "valid_targets_min": 773 + }, + { + "epoch": 0.704, + "grad_norm": 0.5660976015817243, + "learning_rate": 3.980216964556892e-05, + "loss": 0.4006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2702789306640625, + "step": 440, + "valid_targets_mean": 5340.2, + "valid_targets_min": 2464 + }, + { + "epoch": 0.712, + "grad_norm": 0.3583060517799168, + "learning_rate": 3.978618588898873e-05, + "loss": 0.3687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17784491181373596, + "step": 445, + "valid_targets_mean": 5914.0, + "valid_targets_min": 1108 + }, + { + "epoch": 0.72, + "grad_norm": 0.5085712449981009, + "learning_rate": 3.976958472750137e-05, + "loss": 0.415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17204846441745758, + "step": 450, + "valid_targets_mean": 2789.6, + "valid_targets_min": 1260 + }, + { + "epoch": 0.728, + "grad_norm": 0.5391324690732857, + "learning_rate": 3.9752366679126754e-05, + "loss": 0.4117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23437952995300293, + "step": 455, + "valid_targets_mean": 3562.9, + "valid_targets_min": 952 + }, + { + "epoch": 0.736, + "grad_norm": 0.5567986016900565, + "learning_rate": 3.973453228113405e-05, + "loss": 0.4096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22452281415462494, + "step": 460, + "valid_targets_mean": 3349.6, + "valid_targets_min": 623 + }, + { + "epoch": 0.744, + "grad_norm": 0.5058714202986386, + "learning_rate": 3.971608209002489e-05, + "loss": 0.4383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26416462659835815, + "step": 465, + "valid_targets_mean": 4220.1, + "valid_targets_min": 1122 + }, + { + "epoch": 0.752, + "grad_norm": 0.5026494486558096, + "learning_rate": 3.969701668151603e-05, + "loss": 0.3986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16783612966537476, + "step": 470, + "valid_targets_mean": 3548.6, + "valid_targets_min": 787 + }, + { + "epoch": 0.76, + "grad_norm": 0.5620692640215499, + "learning_rate": 3.9677336650521336e-05, + "loss": 0.3936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23793232440948486, + "step": 475, + "valid_targets_mean": 4302.1, + "valid_targets_min": 857 + }, + { + "epoch": 0.768, + "grad_norm": 0.5022492717127841, + "learning_rate": 3.9657042611133294e-05, + "loss": 0.4374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27741798758506775, + "step": 480, + "valid_targets_mean": 5250.4, + "valid_targets_min": 1286 + }, + { + "epoch": 0.776, + "grad_norm": 0.46269173482413795, + "learning_rate": 3.963613519660379e-05, + "loss": 0.4168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2027072012424469, + "step": 485, + "valid_targets_mean": 5004.6, + "valid_targets_min": 613 + }, + { + "epoch": 0.784, + "grad_norm": 0.5741596915736422, + "learning_rate": 3.961461505932435e-05, + "loss": 0.4089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20953507721424103, + "step": 490, + "valid_targets_mean": 4452.9, + "valid_targets_min": 872 + }, + { + "epoch": 0.792, + "grad_norm": 0.4907391959582943, + "learning_rate": 3.959248287080583e-05, + "loss": 0.4368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16955000162124634, + "step": 495, + "valid_targets_mean": 3555.2, + "valid_targets_min": 371 + }, + { + "epoch": 0.8, + "grad_norm": 0.6031193676144202, + "learning_rate": 3.9569739321657416e-05, + "loss": 0.3877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1926141083240509, + "step": 500, + "valid_targets_mean": 2296.8, + "valid_targets_min": 895 + }, + { + "epoch": 0.808, + "grad_norm": 0.5937397695685496, + "learning_rate": 3.9546385121565095e-05, + "loss": 0.407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2059236317873001, + "step": 505, + "valid_targets_mean": 2328.5, + "valid_targets_min": 875 + }, + { + "epoch": 0.816, + "grad_norm": 0.44462295687513187, + "learning_rate": 3.952242099926951e-05, + "loss": 0.39, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15669041872024536, + "step": 510, + "valid_targets_mean": 4059.2, + "valid_targets_min": 896 + }, + { + "epoch": 0.824, + "grad_norm": 0.4975667663636538, + "learning_rate": 3.9497847702543196e-05, + "loss": 0.4132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2388497292995453, + "step": 515, + "valid_targets_mean": 4820.4, + "valid_targets_min": 797 + }, + { + "epoch": 0.832, + "grad_norm": 0.5711830488685984, + "learning_rate": 3.94726659981673e-05, + "loss": 0.4272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1668613851070404, + "step": 520, + "valid_targets_mean": 2210.6, + "valid_targets_min": 1146 + }, + { + "epoch": 0.84, + "grad_norm": 0.5110974517075352, + "learning_rate": 3.94468766719076e-05, + "loss": 0.3883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15008708834648132, + "step": 525, + "valid_targets_mean": 2796.0, + "valid_targets_min": 706 + }, + { + "epoch": 0.848, + "grad_norm": 0.6082548647418597, + "learning_rate": 3.942048052849001e-05, + "loss": 0.4015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20496749877929688, + "step": 530, + "valid_targets_mean": 3376.1, + "valid_targets_min": 727 + }, + { + "epoch": 0.856, + "grad_norm": 0.5881521119749641, + "learning_rate": 3.939347839157548e-05, + "loss": 0.4042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1840178519487381, + "step": 535, + "valid_targets_mean": 3227.4, + "valid_targets_min": 523 + }, + { + "epoch": 0.864, + "grad_norm": 0.40438864628986093, + "learning_rate": 3.9365871103734264e-05, + "loss": 0.3748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14281845092773438, + "step": 540, + "valid_targets_mean": 4520.6, + "valid_targets_min": 1131 + }, + { + "epoch": 0.872, + "grad_norm": 0.43619871599488097, + "learning_rate": 3.933765952641965e-05, + "loss": 0.4081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11543935537338257, + "step": 545, + "valid_targets_mean": 2299.2, + "valid_targets_min": 847 + }, + { + "epoch": 0.88, + "grad_norm": 0.6284680735434611, + "learning_rate": 3.930884453994109e-05, + "loss": 0.3934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2154695689678192, + "step": 550, + "valid_targets_mean": 2253.1, + "valid_targets_min": 1014 + }, + { + "epoch": 0.888, + "grad_norm": 0.534153789789492, + "learning_rate": 3.9279427043436706e-05, + "loss": 0.4357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16836689412593842, + "step": 555, + "valid_targets_mean": 2522.4, + "valid_targets_min": 866 + }, + { + "epoch": 0.896, + "grad_norm": 0.42996929538088907, + "learning_rate": 3.924940795484525e-05, + "loss": 0.3961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1445537656545639, + "step": 560, + "valid_targets_mean": 3909.5, + "valid_targets_min": 779 + }, + { + "epoch": 0.904, + "grad_norm": 0.5487891975426903, + "learning_rate": 3.9218788210877436e-05, + "loss": 0.4047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17581608891487122, + "step": 565, + "valid_targets_mean": 2792.2, + "valid_targets_min": 780 + }, + { + "epoch": 0.912, + "grad_norm": 0.8309295578756681, + "learning_rate": 3.918756876698676e-05, + "loss": 0.4498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24004212021827698, + "step": 570, + "valid_targets_mean": 2094.1, + "valid_targets_min": 800 + }, + { + "epoch": 0.92, + "grad_norm": 0.5731658303042831, + "learning_rate": 3.9155750597339634e-05, + "loss": 0.4248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23413121700286865, + "step": 575, + "valid_targets_mean": 2952.8, + "valid_targets_min": 740 + }, + { + "epoch": 0.928, + "grad_norm": 0.6857186460842621, + "learning_rate": 3.912333469478502e-05, + "loss": 0.4148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24854609370231628, + "step": 580, + "valid_targets_mean": 3259.5, + "valid_targets_min": 1202 + }, + { + "epoch": 0.936, + "grad_norm": 0.46414179194452654, + "learning_rate": 3.909032207082344e-05, + "loss": 0.3897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19742190837860107, + "step": 585, + "valid_targets_mean": 4621.4, + "valid_targets_min": 1137 + }, + { + "epoch": 0.944, + "grad_norm": 0.5432959658650993, + "learning_rate": 3.90567137555754e-05, + "loss": 0.3952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18072067201137543, + "step": 590, + "valid_targets_mean": 2764.4, + "valid_targets_min": 610 + }, + { + "epoch": 0.952, + "grad_norm": 0.513616096435003, + "learning_rate": 3.9022510797749286e-05, + "loss": 0.4508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28625935316085815, + "step": 595, + "valid_targets_mean": 4943.6, + "valid_targets_min": 1006 + }, + { + "epoch": 0.96, + "grad_norm": 0.4135402289886326, + "learning_rate": 3.898771426460859e-05, + "loss": 0.3864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1969718635082245, + "step": 600, + "valid_targets_mean": 4979.9, + "valid_targets_min": 971 + }, + { + "epoch": 0.968, + "grad_norm": 0.42359538658922197, + "learning_rate": 3.8952325241938635e-05, + "loss": 0.4183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1678941547870636, + "step": 605, + "valid_targets_mean": 4025.1, + "valid_targets_min": 1413 + }, + { + "epoch": 0.976, + "grad_norm": 0.437069204077118, + "learning_rate": 3.8916344834012695e-05, + "loss": 0.3807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18256065249443054, + "step": 610, + "valid_targets_mean": 4092.8, + "valid_targets_min": 722 + }, + { + "epoch": 0.984, + "grad_norm": 0.3981270298976202, + "learning_rate": 3.887977416355754e-05, + "loss": 0.3837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.213514506816864, + "step": 615, + "valid_targets_mean": 5298.8, + "valid_targets_min": 1094 + }, + { + "epoch": 0.992, + "grad_norm": 0.39948471020655046, + "learning_rate": 3.884261437171838e-05, + "loss": 0.3919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18268711864948273, + "step": 620, + "valid_targets_mean": 5262.4, + "valid_targets_min": 771 + }, + { + "epoch": 1.0, + "grad_norm": 0.4218730892895318, + "learning_rate": 3.8804866618023284e-05, + "loss": 0.3663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2073502242565155, + "step": 625, + "valid_targets_mean": 6279.8, + "valid_targets_min": 665 + }, + { + "epoch": 1.008, + "grad_norm": 0.4006188688474502, + "learning_rate": 3.876653208034698e-05, + "loss": 0.375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15046511590480804, + "step": 630, + "valid_targets_mean": 4694.0, + "valid_targets_min": 572 + }, + { + "epoch": 1.016, + "grad_norm": 0.5023669209376572, + "learning_rate": 3.8727611954874114e-05, + "loss": 0.4108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19494599103927612, + "step": 635, + "valid_targets_mean": 4312.2, + "valid_targets_min": 1605 + }, + { + "epoch": 1.024, + "grad_norm": 0.5415179452211821, + "learning_rate": 3.8688107456061904e-05, + "loss": 0.3649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16564016044139862, + "step": 640, + "valid_targets_mean": 5437.8, + "valid_targets_min": 950 + }, + { + "epoch": 1.032, + "grad_norm": 0.5250999668731369, + "learning_rate": 3.864801981660227e-05, + "loss": 0.3787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16773557662963867, + "step": 645, + "valid_targets_mean": 2591.0, + "valid_targets_min": 896 + }, + { + "epoch": 1.04, + "grad_norm": 0.4792499299558008, + "learning_rate": 3.860735028738337e-05, + "loss": 0.3879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11868780851364136, + "step": 650, + "valid_targets_mean": 2843.8, + "valid_targets_min": 667 + }, + { + "epoch": 1.048, + "grad_norm": 0.5931956749749214, + "learning_rate": 3.856610013745051e-05, + "loss": 0.3869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17905378341674805, + "step": 655, + "valid_targets_mean": 2138.0, + "valid_targets_min": 811 + }, + { + "epoch": 1.056, + "grad_norm": 0.5070089472539886, + "learning_rate": 3.852427065396665e-05, + "loss": 0.3597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21618938446044922, + "step": 660, + "valid_targets_mean": 3818.6, + "valid_targets_min": 874 + }, + { + "epoch": 1.064, + "grad_norm": 0.5277258256022489, + "learning_rate": 3.848186314217213e-05, + "loss": 0.3832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20905262231826782, + "step": 665, + "valid_targets_mean": 5543.8, + "valid_targets_min": 1842 + }, + { + "epoch": 1.072, + "grad_norm": 0.45992756895656156, + "learning_rate": 3.843887892534402e-05, + "loss": 0.3628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14996416866779327, + "step": 670, + "valid_targets_mean": 3032.8, + "valid_targets_min": 865 + }, + { + "epoch": 1.08, + "grad_norm": 0.43582299518949036, + "learning_rate": 3.8395319344754776e-05, + "loss": 0.3695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14619383215904236, + "step": 675, + "valid_targets_mean": 4816.9, + "valid_targets_min": 678 + }, + { + "epoch": 1.088, + "grad_norm": 0.47992188401849384, + "learning_rate": 3.8351185759630435e-05, + "loss": 0.3989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17877763509750366, + "step": 680, + "valid_targets_mean": 4263.5, + "valid_targets_min": 586 + }, + { + "epoch": 1.096, + "grad_norm": 0.5853943377609927, + "learning_rate": 3.830647954710816e-05, + "loss": 0.3652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09830217808485031, + "step": 685, + "valid_targets_mean": 1025.2, + "valid_targets_min": 527 + }, + { + "epoch": 1.104, + "grad_norm": 0.5453115260739528, + "learning_rate": 3.826120210219331e-05, + "loss": 0.4072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19349700212478638, + "step": 690, + "valid_targets_mean": 3093.0, + "valid_targets_min": 754 + }, + { + "epoch": 1.112, + "grad_norm": 0.37877838490425225, + "learning_rate": 3.8215354837715836e-05, + "loss": 0.3834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12431143969297409, + "step": 695, + "valid_targets_mean": 5127.8, + "valid_targets_min": 707 + }, + { + "epoch": 1.12, + "grad_norm": 0.6610820231714544, + "learning_rate": 3.816893918428631e-05, + "loss": 0.3786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18000784516334534, + "step": 700, + "valid_targets_mean": 2055.1, + "valid_targets_min": 688 + }, + { + "epoch": 1.1280000000000001, + "grad_norm": 0.5300125572424695, + "learning_rate": 3.8121956590251153e-05, + "loss": 0.4069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1879684329032898, + "step": 705, + "valid_targets_mean": 3468.1, + "valid_targets_min": 786 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.44683055917689773, + "learning_rate": 3.8074408521647576e-05, + "loss": 0.3836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20903831720352173, + "step": 710, + "valid_targets_mean": 5525.9, + "valid_targets_min": 422 + }, + { + "epoch": 1.144, + "grad_norm": 0.5102572350380875, + "learning_rate": 3.802629646215771e-05, + "loss": 0.3792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17337819933891296, + "step": 715, + "valid_targets_mean": 4684.0, + "valid_targets_min": 1435 + }, + { + "epoch": 1.152, + "grad_norm": 0.5765523983200641, + "learning_rate": 3.79776219130624e-05, + "loss": 0.3577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1805480420589447, + "step": 720, + "valid_targets_mean": 2605.1, + "valid_targets_min": 1231 + }, + { + "epoch": 1.16, + "grad_norm": 0.4897830077820699, + "learning_rate": 3.792838639319431e-05, + "loss": 0.3629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19262930750846863, + "step": 725, + "valid_targets_mean": 3851.5, + "valid_targets_min": 1736 + }, + { + "epoch": 1.168, + "grad_norm": 0.4322207849895438, + "learning_rate": 3.787859143889054e-05, + "loss": 0.3539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23677626252174377, + "step": 730, + "valid_targets_mean": 6349.4, + "valid_targets_min": 1935 + }, + { + "epoch": 1.176, + "grad_norm": 0.5128262395764999, + "learning_rate": 3.782823860394469e-05, + "loss": 0.3568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20344534516334534, + "step": 735, + "valid_targets_mean": 3306.5, + "valid_targets_min": 1146 + }, + { + "epoch": 1.184, + "grad_norm": 0.574030810014404, + "learning_rate": 3.777732945955841e-05, + "loss": 0.39, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2069251835346222, + "step": 740, + "valid_targets_mean": 3683.5, + "valid_targets_min": 823 + }, + { + "epoch": 1.192, + "grad_norm": 0.5501142652379903, + "learning_rate": 3.772586559429229e-05, + "loss": 0.359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26781395077705383, + "step": 745, + "valid_targets_mean": 4816.9, + "valid_targets_min": 1250 + }, + { + "epoch": 1.2, + "grad_norm": 0.4653456781818237, + "learning_rate": 3.767384861401636e-05, + "loss": 0.3919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1659979671239853, + "step": 750, + "valid_targets_mean": 4349.2, + "valid_targets_min": 847 + }, + { + "epoch": 1.208, + "grad_norm": 0.38274296561169047, + "learning_rate": 3.762128014185998e-05, + "loss": 0.3675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2027568817138672, + "step": 755, + "valid_targets_mean": 7722.0, + "valid_targets_min": 1045 + }, + { + "epoch": 1.216, + "grad_norm": 0.9407563287769353, + "learning_rate": 3.7568161818161135e-05, + "loss": 0.3896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1349801868200302, + "step": 760, + "valid_targets_mean": 2931.2, + "valid_targets_min": 690 + }, + { + "epoch": 1.224, + "grad_norm": 0.5295635911819686, + "learning_rate": 3.751449530041532e-05, + "loss": 0.3771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17271582782268524, + "step": 765, + "valid_targets_mean": 3066.4, + "valid_targets_min": 422 + }, + { + "epoch": 1.232, + "grad_norm": 0.543101523869541, + "learning_rate": 3.7460282263223764e-05, + "loss": 0.3767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17287389934062958, + "step": 770, + "valid_targets_mean": 2902.2, + "valid_targets_min": 1035 + }, + { + "epoch": 1.24, + "grad_norm": 0.5204988467550381, + "learning_rate": 3.740552439824122e-05, + "loss": 0.3991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19387352466583252, + "step": 775, + "valid_targets_mean": 3014.8, + "valid_targets_min": 789 + }, + { + "epoch": 1.248, + "grad_norm": 0.5793118660838545, + "learning_rate": 3.735022341412314e-05, + "loss": 0.352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1868707835674286, + "step": 780, + "valid_targets_mean": 2748.2, + "valid_targets_min": 1103 + }, + { + "epoch": 1.256, + "grad_norm": 0.49312789125401785, + "learning_rate": 3.7294381036472386e-05, + "loss": 0.3778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14733919501304626, + "step": 785, + "valid_targets_mean": 3287.6, + "valid_targets_min": 984 + }, + { + "epoch": 1.264, + "grad_norm": 0.41240222215312733, + "learning_rate": 3.723799900778538e-05, + "loss": 0.3789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.126112163066864, + "step": 790, + "valid_targets_mean": 3915.2, + "valid_targets_min": 767 + }, + { + "epoch": 1.272, + "grad_norm": 0.4390287545765749, + "learning_rate": 3.7181079087397705e-05, + "loss": 0.3514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15731275081634521, + "step": 795, + "valid_targets_mean": 4579.5, + "valid_targets_min": 1110 + }, + { + "epoch": 1.28, + "grad_norm": 0.5857426000892533, + "learning_rate": 3.712362305142926e-05, + "loss": 0.382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2865251302719116, + "step": 800, + "valid_targets_mean": 4268.1, + "valid_targets_min": 903 + }, + { + "epoch": 1.288, + "grad_norm": 0.5522244993785961, + "learning_rate": 3.706563269272878e-05, + "loss": 0.4019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1548284888267517, + "step": 805, + "valid_targets_mean": 5253.2, + "valid_targets_min": 958 + }, + { + "epoch": 1.296, + "grad_norm": 0.49707542029017, + "learning_rate": 3.700710982081794e-05, + "loss": 0.3604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1068187803030014, + "step": 810, + "valid_targets_mean": 2315.8, + "valid_targets_min": 1005 + }, + { + "epoch": 1.304, + "grad_norm": 0.4597650989348005, + "learning_rate": 3.694805626183486e-05, + "loss": 0.3419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1940535604953766, + "step": 815, + "valid_targets_mean": 6227.8, + "valid_targets_min": 1092 + }, + { + "epoch": 1.312, + "grad_norm": 0.4807446238755427, + "learning_rate": 3.688847385847711e-05, + "loss": 0.3648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2306816130876541, + "step": 820, + "valid_targets_mean": 4811.5, + "valid_targets_min": 888 + }, + { + "epoch": 1.32, + "grad_norm": 0.37954247976442135, + "learning_rate": 3.682836446994428e-05, + "loss": 0.355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11209504306316376, + "step": 825, + "valid_targets_mean": 4343.1, + "valid_targets_min": 554 + }, + { + "epoch": 1.328, + "grad_norm": 0.6968928370006656, + "learning_rate": 3.676772997187989e-05, + "loss": 0.4238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1730729341506958, + "step": 830, + "valid_targets_mean": 2439.0, + "valid_targets_min": 598 + }, + { + "epoch": 1.336, + "grad_norm": 0.5644540602579898, + "learning_rate": 3.670657225631289e-05, + "loss": 0.3816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2261945605278015, + "step": 835, + "valid_targets_mean": 3144.8, + "valid_targets_min": 682 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.4518440551193136, + "learning_rate": 3.6644893231598635e-05, + "loss": 0.3949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13814306259155273, + "step": 840, + "valid_targets_mean": 4653.5, + "valid_targets_min": 595 + }, + { + "epoch": 1.3519999999999999, + "grad_norm": 0.5278282431174571, + "learning_rate": 3.658269482235932e-05, + "loss": 0.3715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25394973158836365, + "step": 845, + "valid_targets_mean": 3977.8, + "valid_targets_min": 1029 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 0.4752219437628507, + "learning_rate": 3.651997896942394e-05, + "loss": 0.3455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12458296865224838, + "step": 850, + "valid_targets_mean": 3616.1, + "valid_targets_min": 596 + }, + { + "epoch": 1.3679999999999999, + "grad_norm": 0.5099738101411846, + "learning_rate": 3.645674762976769e-05, + "loss": 0.3951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22577358782291412, + "step": 855, + "valid_targets_mean": 4207.6, + "valid_targets_min": 1149 + }, + { + "epoch": 1.376, + "grad_norm": 0.38904671121083256, + "learning_rate": 3.639300277645096e-05, + "loss": 0.358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13607583940029144, + "step": 860, + "valid_targets_mean": 4714.9, + "valid_targets_min": 1457 + }, + { + "epoch": 1.384, + "grad_norm": 0.3729523812901352, + "learning_rate": 3.6328746398557715e-05, + "loss": 0.3478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1809740662574768, + "step": 865, + "valid_targets_mean": 5782.2, + "valid_targets_min": 965 + }, + { + "epoch": 1.392, + "grad_norm": 1.3346441971185985, + "learning_rate": 3.6263980501133466e-05, + "loss": 0.3478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11153702437877655, + "step": 870, + "valid_targets_mean": 2711.6, + "valid_targets_min": 656 + }, + { + "epoch": 1.4, + "grad_norm": 0.4932114438780455, + "learning_rate": 3.619870710512268e-05, + "loss": 0.3879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16475415229797363, + "step": 875, + "valid_targets_mean": 3808.2, + "valid_targets_min": 573 + }, + { + "epoch": 1.408, + "grad_norm": 0.42202752832274765, + "learning_rate": 3.6132928247305713e-05, + "loss": 0.3583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1160656213760376, + "step": 880, + "valid_targets_mean": 2949.6, + "valid_targets_min": 959 + }, + { + "epoch": 1.416, + "grad_norm": 0.45638707283657404, + "learning_rate": 3.60666459802353e-05, + "loss": 0.4137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1908377707004547, + "step": 885, + "valid_targets_mean": 4822.8, + "valid_targets_min": 1414 + }, + { + "epoch": 1.424, + "grad_norm": 0.49655080228821374, + "learning_rate": 3.599986237217245e-05, + "loss": 0.3884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14393456280231476, + "step": 890, + "valid_targets_mean": 3312.5, + "valid_targets_min": 1288 + }, + { + "epoch": 1.432, + "grad_norm": 0.40994947703418805, + "learning_rate": 3.593257950702194e-05, + "loss": 0.3821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19887655973434448, + "step": 895, + "valid_targets_mean": 5122.5, + "valid_targets_min": 1689 + }, + { + "epoch": 1.44, + "grad_norm": 0.6100341519792659, + "learning_rate": 3.586479948426728e-05, + "loss": 0.4045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29838448762893677, + "step": 900, + "valid_targets_mean": 3684.9, + "valid_targets_min": 781 + }, + { + "epoch": 1.448, + "grad_norm": 0.40258620360925185, + "learning_rate": 3.579652441890523e-05, + "loss": 0.357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15237580239772797, + "step": 905, + "valid_targets_mean": 4839.9, + "valid_targets_min": 1123 + }, + { + "epoch": 1.456, + "grad_norm": 0.6002865400954618, + "learning_rate": 3.572775644137974e-05, + "loss": 0.3534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14805619418621063, + "step": 910, + "valid_targets_mean": 1668.4, + "valid_targets_min": 697 + }, + { + "epoch": 1.464, + "grad_norm": 0.34946528869800725, + "learning_rate": 3.5658497697515534e-05, + "loss": 0.3369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1722603142261505, + "step": 915, + "valid_targets_mean": 6944.6, + "valid_targets_min": 1284 + }, + { + "epoch": 1.472, + "grad_norm": 0.5550974904476521, + "learning_rate": 3.558875034845113e-05, + "loss": 0.3414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1764376014471054, + "step": 920, + "valid_targets_mean": 5085.6, + "valid_targets_min": 1302 + }, + { + "epoch": 1.48, + "grad_norm": 0.40211624001194296, + "learning_rate": 3.551851657057139e-05, + "loss": 0.3507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18077786266803741, + "step": 925, + "valid_targets_mean": 6009.4, + "valid_targets_min": 853 + }, + { + "epoch": 1.488, + "grad_norm": 0.42842599212123683, + "learning_rate": 3.544779855543963e-05, + "loss": 0.3435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21971535682678223, + "step": 930, + "valid_targets_mean": 5389.8, + "valid_targets_min": 1860 + }, + { + "epoch": 1.496, + "grad_norm": 0.5412809944985588, + "learning_rate": 3.5376598509729226e-05, + "loss": 0.3777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20583224296569824, + "step": 935, + "valid_targets_mean": 3356.8, + "valid_targets_min": 1087 + }, + { + "epoch": 1.504, + "grad_norm": 0.4090830832550061, + "learning_rate": 3.5304918655154754e-05, + "loss": 0.3964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21631677448749542, + "step": 940, + "valid_targets_mean": 5357.0, + "valid_targets_min": 1077 + }, + { + "epoch": 1.512, + "grad_norm": 0.6620976500410706, + "learning_rate": 3.523276122840266e-05, + "loss": 0.3548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2258632481098175, + "step": 945, + "valid_targets_mean": 2185.2, + "valid_targets_min": 671 + }, + { + "epoch": 1.52, + "grad_norm": 0.4976296422194412, + "learning_rate": 3.516012848106149e-05, + "loss": 0.3499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18777544796466827, + "step": 950, + "valid_targets_mean": 3989.5, + "valid_targets_min": 1180 + }, + { + "epoch": 1.528, + "grad_norm": 0.4230732174600183, + "learning_rate": 3.5087022679551614e-05, + "loss": 0.3575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19989222288131714, + "step": 955, + "valid_targets_mean": 4162.1, + "valid_targets_min": 1657 + }, + { + "epoch": 1.536, + "grad_norm": 0.5275885905547862, + "learning_rate": 3.5013446105054486e-05, + "loss": 0.356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17071036994457245, + "step": 960, + "valid_targets_mean": 2735.5, + "valid_targets_min": 907 + }, + { + "epoch": 1.544, + "grad_norm": 0.46078220045065427, + "learning_rate": 3.493940105344152e-05, + "loss": 0.3706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1477717161178589, + "step": 965, + "valid_targets_mean": 2956.9, + "valid_targets_min": 947 + }, + { + "epoch": 1.552, + "grad_norm": 0.3995299747785965, + "learning_rate": 3.4864889835202366e-05, + "loss": 0.3534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18769648671150208, + "step": 970, + "valid_targets_mean": 5725.6, + "valid_targets_min": 1999 + }, + { + "epoch": 1.56, + "grad_norm": 0.64944906147947, + "learning_rate": 3.4789914775372905e-05, + "loss": 0.3862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24089542031288147, + "step": 975, + "valid_targets_mean": 2843.6, + "valid_targets_min": 869 + }, + { + "epoch": 1.568, + "grad_norm": 0.5425019910583312, + "learning_rate": 3.471447821346264e-05, + "loss": 0.3922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15392085909843445, + "step": 980, + "valid_targets_mean": 2680.6, + "valid_targets_min": 908 + }, + { + "epoch": 1.576, + "grad_norm": 0.5368209449228799, + "learning_rate": 3.463858250338168e-05, + "loss": 0.396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1622292697429657, + "step": 985, + "valid_targets_mean": 2758.0, + "valid_targets_min": 869 + }, + { + "epoch": 1.584, + "grad_norm": 0.43598303845384095, + "learning_rate": 3.4562230013367374e-05, + "loss": 0.4045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21002206206321716, + "step": 990, + "valid_targets_mean": 4658.1, + "valid_targets_min": 1457 + }, + { + "epoch": 1.592, + "grad_norm": 0.3904901071899063, + "learning_rate": 3.448542312591032e-05, + "loss": 0.37, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20996427536010742, + "step": 995, + "valid_targets_mean": 6088.1, + "valid_targets_min": 688 + }, + { + "epoch": 1.6, + "grad_norm": 0.41487996792002385, + "learning_rate": 3.440816423768007e-05, + "loss": 0.3465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2232140749692917, + "step": 1000, + "valid_targets_mean": 6249.6, + "valid_targets_min": 790 + }, + { + "epoch": 1.608, + "grad_norm": 0.41215706864500073, + "learning_rate": 3.433045575945031e-05, + "loss": 0.3747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1780746430158615, + "step": 1005, + "valid_targets_mean": 6710.1, + "valid_targets_min": 720 + }, + { + "epoch": 1.616, + "grad_norm": 0.5427836394723989, + "learning_rate": 3.42523001160237e-05, + "loss": 0.4069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22656163573265076, + "step": 1010, + "valid_targets_mean": 3230.6, + "valid_targets_min": 1009 + }, + { + "epoch": 1.624, + "grad_norm": 0.4790601799537503, + "learning_rate": 3.417369974615615e-05, + "loss": 0.3731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1649462878704071, + "step": 1015, + "valid_targets_mean": 4221.9, + "valid_targets_min": 1007 + }, + { + "epoch": 1.6320000000000001, + "grad_norm": 0.4765491643369093, + "learning_rate": 3.409465710248074e-05, + "loss": 0.3515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14106625318527222, + "step": 1020, + "valid_targets_mean": 2703.8, + "valid_targets_min": 1020 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 0.49781135493555667, + "learning_rate": 3.401517465143119e-05, + "loss": 0.3895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21954402327537537, + "step": 1025, + "valid_targets_mean": 4009.2, + "valid_targets_min": 1460 + }, + { + "epoch": 1.6480000000000001, + "grad_norm": 0.45112165406620686, + "learning_rate": 3.393525487316489e-05, + "loss": 0.3614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18274998664855957, + "step": 1030, + "valid_targets_mean": 4840.9, + "valid_targets_min": 1708 + }, + { + "epoch": 1.6560000000000001, + "grad_norm": 0.4693212042872953, + "learning_rate": 3.385490026148554e-05, + "loss": 0.4153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1388929784297943, + "step": 1035, + "valid_targets_mean": 3562.4, + "valid_targets_min": 625 + }, + { + "epoch": 1.6640000000000001, + "grad_norm": 0.37397480223788876, + "learning_rate": 3.377411332376529e-05, + "loss": 0.3642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2300184965133667, + "step": 1040, + "valid_targets_mean": 7929.9, + "valid_targets_min": 994 + }, + { + "epoch": 1.6720000000000002, + "grad_norm": 0.3841145063100303, + "learning_rate": 3.369289658086651e-05, + "loss": 0.3544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13292424380779266, + "step": 1045, + "valid_targets_mean": 3923.2, + "valid_targets_min": 659 + }, + { + "epoch": 1.6800000000000002, + "grad_norm": 0.37819794098936316, + "learning_rate": 3.3611252567063184e-05, + "loss": 0.36, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19257867336273193, + "step": 1050, + "valid_targets_mean": 7234.4, + "valid_targets_min": 1669 + }, + { + "epoch": 1.688, + "grad_norm": 0.4307477074202791, + "learning_rate": 3.352918382996174e-05, + "loss": 0.354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19784092903137207, + "step": 1055, + "valid_targets_mean": 4706.8, + "valid_targets_min": 1256 + }, + { + "epoch": 1.696, + "grad_norm": 0.47999337411149967, + "learning_rate": 3.344669293042163e-05, + "loss": 0.3863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19967739284038544, + "step": 1060, + "valid_targets_mean": 3586.5, + "valid_targets_min": 1024 + }, + { + "epoch": 1.704, + "grad_norm": 0.4958715076592039, + "learning_rate": 3.336378244247539e-05, + "loss": 0.3851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25138044357299805, + "step": 1065, + "valid_targets_mean": 4725.0, + "valid_targets_min": 877 + }, + { + "epoch": 1.712, + "grad_norm": 0.44982531036880435, + "learning_rate": 3.3280454953248326e-05, + "loss": 0.3318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18653461337089539, + "step": 1070, + "valid_targets_mean": 4260.8, + "valid_targets_min": 1106 + }, + { + "epoch": 1.72, + "grad_norm": 0.4523598014038419, + "learning_rate": 3.3196713062877765e-05, + "loss": 0.3524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14612746238708496, + "step": 1075, + "valid_targets_mean": 4109.9, + "valid_targets_min": 896 + }, + { + "epoch": 1.728, + "grad_norm": 0.40758165227767357, + "learning_rate": 3.311255938443196e-05, + "loss": 0.3723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13389909267425537, + "step": 1080, + "valid_targets_mean": 4376.0, + "valid_targets_min": 950 + }, + { + "epoch": 1.736, + "grad_norm": 0.5491124893709626, + "learning_rate": 3.3027996543828524e-05, + "loss": 0.3695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18468721210956573, + "step": 1085, + "valid_targets_mean": 2868.6, + "valid_targets_min": 849 + }, + { + "epoch": 1.744, + "grad_norm": 0.3993174672092302, + "learning_rate": 3.2943027179752494e-05, + "loss": 0.3416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20559825003147125, + "step": 1090, + "valid_targets_mean": 6660.8, + "valid_targets_min": 760 + }, + { + "epoch": 1.752, + "grad_norm": 0.40380661411621366, + "learning_rate": 3.285765394357401e-05, + "loss": 0.3312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15859174728393555, + "step": 1095, + "valid_targets_mean": 4548.8, + "valid_targets_min": 829 + }, + { + "epoch": 1.76, + "grad_norm": 0.5382918290555533, + "learning_rate": 3.277187949926556e-05, + "loss": 0.3523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21806976199150085, + "step": 1100, + "valid_targets_mean": 3312.4, + "valid_targets_min": 1025 + }, + { + "epoch": 1.768, + "grad_norm": 0.4145669760422594, + "learning_rate": 3.268570652331888e-05, + "loss": 0.3984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1447543501853943, + "step": 1105, + "valid_targets_mean": 3821.0, + "valid_targets_min": 722 + }, + { + "epoch": 1.776, + "grad_norm": 0.6163574474026122, + "learning_rate": 3.2599137704661405e-05, + "loss": 0.3596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21196293830871582, + "step": 1110, + "valid_targets_mean": 2316.6, + "valid_targets_min": 586 + }, + { + "epoch": 1.784, + "grad_norm": 0.47460067439094217, + "learning_rate": 3.251217574457239e-05, + "loss": 0.3742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2540559768676758, + "step": 1115, + "valid_targets_mean": 4955.0, + "valid_targets_min": 1701 + }, + { + "epoch": 1.792, + "grad_norm": 0.4624184685618981, + "learning_rate": 3.242482335659861e-05, + "loss": 0.3834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18782807886600494, + "step": 1120, + "valid_targets_mean": 4641.6, + "valid_targets_min": 785 + }, + { + "epoch": 1.8, + "grad_norm": 0.49672506088763413, + "learning_rate": 3.2337083266469687e-05, + "loss": 0.3983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24676811695098877, + "step": 1125, + "valid_targets_mean": 4641.5, + "valid_targets_min": 803 + }, + { + "epoch": 1.808, + "grad_norm": 0.4252619396562025, + "learning_rate": 3.224895821201304e-05, + "loss": 0.3789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2329331487417221, + "step": 1130, + "valid_targets_mean": 5197.2, + "valid_targets_min": 760 + }, + { + "epoch": 1.8159999999999998, + "grad_norm": 0.4185531683126807, + "learning_rate": 3.2160450943068446e-05, + "loss": 0.3662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16280978918075562, + "step": 1135, + "valid_targets_mean": 5254.2, + "valid_targets_min": 2000 + }, + { + "epoch": 1.8239999999999998, + "grad_norm": 0.696452870548893, + "learning_rate": 3.207156422140225e-05, + "loss": 0.4045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19130727648735046, + "step": 1140, + "valid_targets_mean": 1899.5, + "valid_targets_min": 771 + }, + { + "epoch": 1.8319999999999999, + "grad_norm": 0.48431295913248046, + "learning_rate": 3.198230082062115e-05, + "loss": 0.3836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2074839472770691, + "step": 1145, + "valid_targets_mean": 5449.5, + "valid_targets_min": 852 + }, + { + "epoch": 1.8399999999999999, + "grad_norm": 0.4133281290019396, + "learning_rate": 3.189266352608574e-05, + "loss": 0.366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1883474886417389, + "step": 1150, + "valid_targets_mean": 4908.2, + "valid_targets_min": 1127 + }, + { + "epoch": 1.8479999999999999, + "grad_norm": 0.40434980197549936, + "learning_rate": 3.180265513482345e-05, + "loss": 0.3366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15408740937709808, + "step": 1155, + "valid_targets_mean": 4307.4, + "valid_targets_min": 892 + }, + { + "epoch": 1.8559999999999999, + "grad_norm": 0.44482757502979986, + "learning_rate": 3.171227845544143e-05, + "loss": 0.3676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2276269793510437, + "step": 1160, + "valid_targets_mean": 4911.0, + "valid_targets_min": 957 + }, + { + "epoch": 1.8639999999999999, + "grad_norm": 0.43600977689095316, + "learning_rate": 3.162153630803877e-05, + "loss": 0.3542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15386472642421722, + "step": 1165, + "valid_targets_mean": 4215.6, + "valid_targets_min": 1328 + }, + { + "epoch": 1.8719999999999999, + "grad_norm": 0.37429615582479875, + "learning_rate": 3.153043152411861e-05, + "loss": 0.3945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1603434681892395, + "step": 1170, + "valid_targets_mean": 4956.5, + "valid_targets_min": 1233 + }, + { + "epoch": 1.88, + "grad_norm": 0.42018805399081294, + "learning_rate": 3.14389669464997e-05, + "loss": 0.367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13551610708236694, + "step": 1175, + "valid_targets_mean": 3497.2, + "valid_targets_min": 699 + }, + { + "epoch": 1.888, + "grad_norm": 0.4485191669176861, + "learning_rate": 3.134714542922777e-05, + "loss": 0.3694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1404634714126587, + "step": 1180, + "valid_targets_mean": 3324.0, + "valid_targets_min": 857 + }, + { + "epoch": 1.896, + "grad_norm": 0.5320471292171671, + "learning_rate": 3.1254969837486425e-05, + "loss": 0.3528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22733406722545624, + "step": 1185, + "valid_targets_mean": 3494.6, + "valid_targets_min": 1049 + }, + { + "epoch": 1.904, + "grad_norm": 0.4429069138743051, + "learning_rate": 3.116244304750774e-05, + "loss": 0.3441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21461255848407745, + "step": 1190, + "valid_targets_mean": 5988.0, + "valid_targets_min": 1106 + }, + { + "epoch": 1.912, + "grad_norm": 0.4909918363267137, + "learning_rate": 3.106956794648254e-05, + "loss": 0.3888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3207101821899414, + "step": 1195, + "valid_targets_mean": 5188.9, + "valid_targets_min": 1740 + }, + { + "epoch": 1.92, + "grad_norm": 0.4380391893049689, + "learning_rate": 3.097634743247026e-05, + "loss": 0.3777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20924848318099976, + "step": 1200, + "valid_targets_mean": 5444.6, + "valid_targets_min": 859 + }, + { + "epoch": 1.928, + "grad_norm": 0.39395020883135107, + "learning_rate": 3.08827844143086e-05, + "loss": 0.3606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1892511397600174, + "step": 1205, + "valid_targets_mean": 4884.9, + "valid_targets_min": 1116 + }, + { + "epoch": 1.936, + "grad_norm": 0.4431767573115503, + "learning_rate": 3.078888181152264e-05, + "loss": 0.3794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12274126708507538, + "step": 1210, + "valid_targets_mean": 2207.4, + "valid_targets_min": 565 + }, + { + "epoch": 1.944, + "grad_norm": 0.4393189555489097, + "learning_rate": 3.0694642554233855e-05, + "loss": 0.3684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1852644830942154, + "step": 1215, + "valid_targets_mean": 3580.2, + "valid_targets_min": 847 + }, + { + "epoch": 1.952, + "grad_norm": 0.5175461857648035, + "learning_rate": 3.0600069583068594e-05, + "loss": 0.3874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13556857407093048, + "step": 1220, + "valid_targets_mean": 2068.8, + "valid_targets_min": 911 + }, + { + "epoch": 1.96, + "grad_norm": 0.4344897474377549, + "learning_rate": 3.0505165849066394e-05, + "loss": 0.3416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16121706366539001, + "step": 1225, + "valid_targets_mean": 4083.2, + "valid_targets_min": 1160 + }, + { + "epoch": 1.968, + "grad_norm": 0.3998726023920137, + "learning_rate": 3.040993431358782e-05, + "loss": 0.3748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20173384249210358, + "step": 1230, + "valid_targets_mean": 5485.5, + "valid_targets_min": 795 + }, + { + "epoch": 1.976, + "grad_norm": 0.43530746416898686, + "learning_rate": 3.031437794822215e-05, + "loss": 0.3352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1987071931362152, + "step": 1235, + "valid_targets_mean": 4561.8, + "valid_targets_min": 737 + }, + { + "epoch": 1.984, + "grad_norm": 0.45516788943319536, + "learning_rate": 3.021849973469455e-05, + "loss": 0.3869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18358221650123596, + "step": 1240, + "valid_targets_mean": 3207.0, + "valid_targets_min": 998 + }, + { + "epoch": 1.992, + "grad_norm": 0.4576579120788942, + "learning_rate": 3.012230266477313e-05, + "loss": 0.3758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11642518639564514, + "step": 1245, + "valid_targets_mean": 2846.4, + "valid_targets_min": 645 + }, + { + "epoch": 2.0, + "grad_norm": 0.41687844040354605, + "learning_rate": 3.0025789740175502e-05, + "loss": 0.3621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22766928374767303, + "step": 1250, + "valid_targets_mean": 5502.0, + "valid_targets_min": 804 + }, + { + "epoch": 2.008, + "grad_norm": 0.39445917384438633, + "learning_rate": 2.9928963972475186e-05, + "loss": 0.3294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18963614106178284, + "step": 1255, + "valid_targets_mean": 6827.1, + "valid_targets_min": 716 + }, + { + "epoch": 2.016, + "grad_norm": 0.47795453215109707, + "learning_rate": 2.9831828383007585e-05, + "loss": 0.3477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16897451877593994, + "step": 1260, + "valid_targets_mean": 3771.0, + "valid_targets_min": 1365 + }, + { + "epoch": 2.024, + "grad_norm": 0.40551985391746687, + "learning_rate": 2.9734386002775754e-05, + "loss": 0.3464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11677554249763489, + "step": 1265, + "valid_targets_mean": 4102.6, + "valid_targets_min": 621 + }, + { + "epoch": 2.032, + "grad_norm": 0.42377235398466956, + "learning_rate": 2.963663987235577e-05, + "loss": 0.3505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1520041525363922, + "step": 1270, + "valid_targets_mean": 5001.9, + "valid_targets_min": 721 + }, + { + "epoch": 2.04, + "grad_norm": 0.4806596558991801, + "learning_rate": 2.95385930418019e-05, + "loss": 0.3668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18150800466537476, + "step": 1275, + "valid_targets_mean": 3979.8, + "valid_targets_min": 734 + }, + { + "epoch": 2.048, + "grad_norm": 0.45923619311529357, + "learning_rate": 2.9440248570551406e-05, + "loss": 0.3577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17516975104808807, + "step": 1280, + "valid_targets_mean": 4615.6, + "valid_targets_min": 732 + }, + { + "epoch": 2.056, + "grad_norm": 0.43037695871723236, + "learning_rate": 2.934160952732907e-05, + "loss": 0.3065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1621362715959549, + "step": 1285, + "valid_targets_mean": 4676.5, + "valid_targets_min": 1619 + }, + { + "epoch": 2.064, + "grad_norm": 0.5278235222224164, + "learning_rate": 2.9242678990051462e-05, + "loss": 0.3165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12950673699378967, + "step": 1290, + "valid_targets_mean": 3176.2, + "valid_targets_min": 820 + }, + { + "epoch": 2.072, + "grad_norm": 0.591249312950412, + "learning_rate": 2.9143460045730886e-05, + "loss": 0.3165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25125831365585327, + "step": 1295, + "valid_targets_mean": 4605.6, + "valid_targets_min": 1051 + }, + { + "epoch": 2.08, + "grad_norm": 0.4633677094496606, + "learning_rate": 2.9043955790379035e-05, + "loss": 0.3412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15374936163425446, + "step": 1300, + "valid_targets_mean": 3456.1, + "valid_targets_min": 596 + }, + { + "epoch": 2.088, + "grad_norm": 0.6839077748868694, + "learning_rate": 2.8944169328910427e-05, + "loss": 0.3314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21061047911643982, + "step": 1305, + "valid_targets_mean": 2743.4, + "valid_targets_min": 736 + }, + { + "epoch": 2.096, + "grad_norm": 0.40010179262506984, + "learning_rate": 2.884410377504547e-05, + "loss": 0.3489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20529218018054962, + "step": 1310, + "valid_targets_mean": 5945.4, + "valid_targets_min": 1603 + }, + { + "epoch": 2.104, + "grad_norm": 0.46363378675791567, + "learning_rate": 2.8743762251213333e-05, + "loss": 0.3516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13235880434513092, + "step": 1315, + "valid_targets_mean": 3557.6, + "valid_targets_min": 909 + }, + { + "epoch": 2.112, + "grad_norm": 0.5226657341187503, + "learning_rate": 2.8643147888454507e-05, + "loss": 0.321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17466039955615997, + "step": 1320, + "valid_targets_mean": 5333.5, + "valid_targets_min": 948 + }, + { + "epoch": 2.12, + "grad_norm": 0.5688435709083185, + "learning_rate": 2.854226382632312e-05, + "loss": 0.3687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19334635138511658, + "step": 1325, + "valid_targets_mean": 3006.2, + "valid_targets_min": 607 + }, + { + "epoch": 2.128, + "grad_norm": 0.3938353360765779, + "learning_rate": 2.844111321278893e-05, + "loss": 0.3356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18020674586296082, + "step": 1330, + "valid_targets_mean": 7703.2, + "valid_targets_min": 2069 + }, + { + "epoch": 2.136, + "grad_norm": 0.6166572236379344, + "learning_rate": 2.833969920413913e-05, + "loss": 0.3654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15063633024692535, + "step": 1335, + "valid_targets_mean": 2484.9, + "valid_targets_min": 588 + }, + { + "epoch": 2.144, + "grad_norm": 0.4594292950880163, + "learning_rate": 2.8238024964879857e-05, + "loss": 0.3588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1551552265882492, + "step": 1340, + "valid_targets_mean": 4537.5, + "valid_targets_min": 1203 + }, + { + "epoch": 2.152, + "grad_norm": 0.4759453483875312, + "learning_rate": 2.8136093667637438e-05, + "loss": 0.3608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19271422922611237, + "step": 1345, + "valid_targets_mean": 5253.5, + "valid_targets_min": 634 + }, + { + "epoch": 2.16, + "grad_norm": 0.4274589956773415, + "learning_rate": 2.8033908493059394e-05, + "loss": 0.3447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26053884625434875, + "step": 1350, + "valid_targets_mean": 7490.9, + "valid_targets_min": 696 + }, + { + "epoch": 2.168, + "grad_norm": 0.48920586887692913, + "learning_rate": 2.793147262971519e-05, + "loss": 0.3384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20303943753242493, + "step": 1355, + "valid_targets_mean": 3912.5, + "valid_targets_min": 1093 + }, + { + "epoch": 2.176, + "grad_norm": 0.3805101077019346, + "learning_rate": 2.7828789273996748e-05, + "loss": 0.3513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1607498824596405, + "step": 1360, + "valid_targets_mean": 7038.1, + "valid_targets_min": 950 + }, + { + "epoch": 2.184, + "grad_norm": 0.4436216784104909, + "learning_rate": 2.7725861630018703e-05, + "loss": 0.3757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24152888357639313, + "step": 1365, + "valid_targets_mean": 6961.2, + "valid_targets_min": 2951 + }, + { + "epoch": 2.192, + "grad_norm": 0.49361804115715835, + "learning_rate": 2.7622692909518423e-05, + "loss": 0.3367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1655840426683426, + "step": 1370, + "valid_targets_mean": 3099.6, + "valid_targets_min": 776 + }, + { + "epoch": 2.2, + "grad_norm": 0.3908270372770131, + "learning_rate": 2.7519286331755766e-05, + "loss": 0.3292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20059353113174438, + "step": 1375, + "valid_targets_mean": 6875.1, + "valid_targets_min": 863 + }, + { + "epoch": 2.208, + "grad_norm": 0.3381814363001518, + "learning_rate": 2.7415645123412672e-05, + "loss": 0.3038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14437264204025269, + "step": 1380, + "valid_targets_mean": 5681.9, + "valid_targets_min": 1259 + }, + { + "epoch": 2.216, + "grad_norm": 0.4335970734003149, + "learning_rate": 2.731177251849246e-05, + "loss": 0.3905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12784774601459503, + "step": 1385, + "valid_targets_mean": 3582.9, + "valid_targets_min": 1328 + }, + { + "epoch": 2.224, + "grad_norm": 0.5013659443395456, + "learning_rate": 2.7207671758218884e-05, + "loss": 0.324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20887424051761627, + "step": 1390, + "valid_targets_mean": 3934.0, + "valid_targets_min": 1019 + }, + { + "epoch": 2.232, + "grad_norm": 0.5451971071443736, + "learning_rate": 2.710334609093504e-05, + "loss": 0.3229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12654241919517517, + "step": 1395, + "valid_targets_mean": 2552.6, + "valid_targets_min": 1080 + }, + { + "epoch": 2.24, + "grad_norm": 0.5275185534002302, + "learning_rate": 2.699879877200198e-05, + "loss": 0.3476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17860426008701324, + "step": 1400, + "valid_targets_mean": 4925.5, + "valid_targets_min": 1138 + }, + { + "epoch": 2.248, + "grad_norm": 0.4750860893446014, + "learning_rate": 2.6894033063697143e-05, + "loss": 0.3167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16186845302581787, + "step": 1405, + "valid_targets_mean": 3735.0, + "valid_targets_min": 484 + }, + { + "epoch": 2.2560000000000002, + "grad_norm": 0.5373568486267774, + "learning_rate": 2.6789052235112554e-05, + "loss": 0.3537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22873114049434662, + "step": 1410, + "valid_targets_mean": 4564.0, + "valid_targets_min": 632 + }, + { + "epoch": 2.2640000000000002, + "grad_norm": 0.5788908168658097, + "learning_rate": 2.66838595620528e-05, + "loss": 0.3401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2733263075351715, + "step": 1415, + "valid_targets_mean": 4478.9, + "valid_targets_min": 673 + }, + { + "epoch": 2.2720000000000002, + "grad_norm": 0.586889266057967, + "learning_rate": 2.6578458326932842e-05, + "loss": 0.3683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17043578624725342, + "step": 1420, + "valid_targets_mean": 2696.2, + "valid_targets_min": 811 + }, + { + "epoch": 2.2800000000000002, + "grad_norm": 0.493980927370087, + "learning_rate": 2.6472851818675583e-05, + "loss": 0.3391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2201269567012787, + "step": 1425, + "valid_targets_mean": 4983.6, + "valid_targets_min": 810 + }, + { + "epoch": 2.288, + "grad_norm": 0.3286261942090746, + "learning_rate": 2.6367043332609223e-05, + "loss": 0.3392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13666635751724243, + "step": 1430, + "valid_targets_mean": 7580.5, + "valid_targets_min": 1536 + }, + { + "epoch": 2.296, + "grad_norm": 0.4699948466545275, + "learning_rate": 2.6261036170364448e-05, + "loss": 0.3476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1846490055322647, + "step": 1435, + "valid_targets_mean": 3870.8, + "valid_targets_min": 660 + }, + { + "epoch": 2.304, + "grad_norm": 0.6157253523596955, + "learning_rate": 2.6154833639771415e-05, + "loss": 0.362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22415241599082947, + "step": 1440, + "valid_targets_mean": 2897.1, + "valid_targets_min": 1086 + }, + { + "epoch": 2.312, + "grad_norm": 0.45969123667163286, + "learning_rate": 2.6048439054756492e-05, + "loss": 0.3217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17089970409870148, + "step": 1445, + "valid_targets_mean": 4007.8, + "valid_targets_min": 905 + }, + { + "epoch": 2.32, + "grad_norm": 0.4423705287499431, + "learning_rate": 2.594185573523892e-05, + "loss": 0.3455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1676362156867981, + "step": 1450, + "valid_targets_mean": 8078.1, + "valid_targets_min": 969 + }, + { + "epoch": 2.328, + "grad_norm": 0.5054530871166943, + "learning_rate": 2.583508700702716e-05, + "loss": 0.3509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13996833562850952, + "step": 1455, + "valid_targets_mean": 3282.5, + "valid_targets_min": 864 + }, + { + "epoch": 2.336, + "grad_norm": 0.4468904131293525, + "learning_rate": 2.572813620171513e-05, + "loss": 0.3255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1576099395751953, + "step": 1460, + "valid_targets_mean": 4571.8, + "valid_targets_min": 1272 + }, + { + "epoch": 2.344, + "grad_norm": 0.4624486850943764, + "learning_rate": 2.5621006656578267e-05, + "loss": 0.3153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15907934308052063, + "step": 1465, + "valid_targets_mean": 4421.0, + "valid_targets_min": 982 + }, + { + "epoch": 2.352, + "grad_norm": 0.4307528801295405, + "learning_rate": 2.5513701714469373e-05, + "loss": 0.3714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1317245364189148, + "step": 1470, + "valid_targets_mean": 3624.6, + "valid_targets_min": 854 + }, + { + "epoch": 2.36, + "grad_norm": 0.5446119589065747, + "learning_rate": 2.540622472371429e-05, + "loss": 0.3409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1581343561410904, + "step": 1475, + "valid_targets_mean": 3219.8, + "valid_targets_min": 535 + }, + { + "epoch": 2.368, + "grad_norm": 0.7703262680045895, + "learning_rate": 2.5298579038007478e-05, + "loss": 0.351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18610429763793945, + "step": 1480, + "valid_targets_mean": 3620.9, + "valid_targets_min": 1339 + }, + { + "epoch": 2.376, + "grad_norm": 0.4079087647088371, + "learning_rate": 2.519076801630727e-05, + "loss": 0.3062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14258792996406555, + "step": 1485, + "valid_targets_mean": 5352.1, + "valid_targets_min": 1055 + }, + { + "epoch": 2.384, + "grad_norm": 0.514685400749292, + "learning_rate": 2.508279502273117e-05, + "loss": 0.335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20326395332813263, + "step": 1490, + "valid_targets_mean": 4564.5, + "valid_targets_min": 974 + }, + { + "epoch": 2.392, + "grad_norm": 0.5844991616700916, + "learning_rate": 2.4974663426450798e-05, + "loss": 0.3599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2439168244600296, + "step": 1495, + "valid_targets_mean": 3669.2, + "valid_targets_min": 1138 + }, + { + "epoch": 2.4, + "grad_norm": 0.4534268106459526, + "learning_rate": 2.4866376601586798e-05, + "loss": 0.3367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14468619227409363, + "step": 1500, + "valid_targets_mean": 4236.1, + "valid_targets_min": 337 + }, + { + "epoch": 2.408, + "grad_norm": 0.37149672658326494, + "learning_rate": 2.475793792710352e-05, + "loss": 0.3079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15612857043743134, + "step": 1505, + "valid_targets_mean": 5706.2, + "valid_targets_min": 1033 + }, + { + "epoch": 2.416, + "grad_norm": 0.431204790605149, + "learning_rate": 2.4649350786703637e-05, + "loss": 0.3472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15120291709899902, + "step": 1510, + "valid_targets_mean": 3504.2, + "valid_targets_min": 537 + }, + { + "epoch": 2.424, + "grad_norm": 0.38058066092756215, + "learning_rate": 2.45406185687225e-05, + "loss": 0.3334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2249833345413208, + "step": 1515, + "valid_targets_mean": 7466.4, + "valid_targets_min": 877 + }, + { + "epoch": 2.432, + "grad_norm": 0.4958529733126483, + "learning_rate": 2.443174466602246e-05, + "loss": 0.3362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18912896513938904, + "step": 1520, + "valid_targets_mean": 3740.2, + "valid_targets_min": 1103 + }, + { + "epoch": 2.44, + "grad_norm": 0.44552157761164163, + "learning_rate": 2.4322732475886953e-05, + "loss": 0.3425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.196616530418396, + "step": 1525, + "valid_targets_mean": 5088.4, + "valid_targets_min": 1383 + }, + { + "epoch": 2.448, + "grad_norm": 0.5732608370374113, + "learning_rate": 2.4213585399914528e-05, + "loss": 0.3386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.261408269405365, + "step": 1530, + "valid_targets_mean": 5127.4, + "valid_targets_min": 914 + }, + { + "epoch": 2.456, + "grad_norm": 0.4646397657685872, + "learning_rate": 2.4104306843912687e-05, + "loss": 0.3481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19430388510227203, + "step": 1535, + "valid_targets_mean": 4647.1, + "valid_targets_min": 841 + }, + { + "epoch": 2.464, + "grad_norm": 0.44219115003302095, + "learning_rate": 2.3994900217791615e-05, + "loss": 0.3248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14268314838409424, + "step": 1540, + "valid_targets_mean": 3976.5, + "valid_targets_min": 697 + }, + { + "epoch": 2.472, + "grad_norm": 0.4933434455561328, + "learning_rate": 2.3885368935457762e-05, + "loss": 0.3596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16441547870635986, + "step": 1545, + "valid_targets_mean": 3575.2, + "valid_targets_min": 823 + }, + { + "epoch": 2.48, + "grad_norm": 0.4326667267917598, + "learning_rate": 2.3775716414707355e-05, + "loss": 0.328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1232375055551529, + "step": 1550, + "valid_targets_mean": 3318.1, + "valid_targets_min": 1079 + }, + { + "epoch": 2.488, + "grad_norm": 0.429583180648258, + "learning_rate": 2.36659460771197e-05, + "loss": 0.3812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07942848652601242, + "step": 1555, + "valid_targets_mean": 2053.8, + "valid_targets_min": 653 + }, + { + "epoch": 2.496, + "grad_norm": 0.5473918418436651, + "learning_rate": 2.3556061347950455e-05, + "loss": 0.3418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20061814785003662, + "step": 1560, + "valid_targets_mean": 3775.9, + "valid_targets_min": 1127 + }, + { + "epoch": 2.504, + "grad_norm": 0.6024361371215083, + "learning_rate": 2.3446065656024734e-05, + "loss": 0.3522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21523049473762512, + "step": 1565, + "valid_targets_mean": 2751.6, + "valid_targets_min": 708 + }, + { + "epoch": 2.512, + "grad_norm": 0.5006491647087972, + "learning_rate": 2.33359624336301e-05, + "loss": 0.3123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09982918202877045, + "step": 1570, + "valid_targets_mean": 1937.8, + "valid_targets_min": 1018 + }, + { + "epoch": 2.52, + "grad_norm": 0.36371194764317616, + "learning_rate": 2.3225755116409497e-05, + "loss": 0.3093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17913754284381866, + "step": 1575, + "valid_targets_mean": 7189.9, + "valid_targets_min": 686 + }, + { + "epoch": 2.528, + "grad_norm": 0.5048673985779927, + "learning_rate": 2.311544714325403e-05, + "loss": 0.3441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1666538417339325, + "step": 1580, + "valid_targets_mean": 3370.5, + "valid_targets_min": 819 + }, + { + "epoch": 2.536, + "grad_norm": 0.5182580499328722, + "learning_rate": 2.300504195619563e-05, + "loss": 0.3615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2155158817768097, + "step": 1585, + "valid_targets_mean": 4026.4, + "valid_targets_min": 695 + }, + { + "epoch": 2.544, + "grad_norm": 0.468514133638622, + "learning_rate": 2.2894543000299697e-05, + "loss": 0.3323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24576425552368164, + "step": 1590, + "valid_targets_mean": 5894.2, + "valid_targets_min": 942 + }, + { + "epoch": 2.552, + "grad_norm": 0.4884711618253095, + "learning_rate": 2.2783953723557572e-05, + "loss": 0.342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16744929552078247, + "step": 1595, + "valid_targets_mean": 3803.2, + "valid_targets_min": 927 + }, + { + "epoch": 2.56, + "grad_norm": 0.5999751684945661, + "learning_rate": 2.2673277576778946e-05, + "loss": 0.3547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21919560432434082, + "step": 1600, + "valid_targets_mean": 2878.8, + "valid_targets_min": 994 + }, + { + "epoch": 2.568, + "grad_norm": 0.5423569314618256, + "learning_rate": 2.2562518013484208e-05, + "loss": 0.3769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24968698620796204, + "step": 1605, + "valid_targets_mean": 3738.9, + "valid_targets_min": 887 + }, + { + "epoch": 2.576, + "grad_norm": 0.5562360071258139, + "learning_rate": 2.245167848979664e-05, + "loss": 0.3489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18698713183403015, + "step": 1610, + "valid_targets_mean": 2892.8, + "valid_targets_min": 1438 + }, + { + "epoch": 2.584, + "grad_norm": 0.5326819387179283, + "learning_rate": 2.23407624643346e-05, + "loss": 0.3371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16616696119308472, + "step": 1615, + "valid_targets_mean": 3816.0, + "valid_targets_min": 906 + }, + { + "epoch": 2.592, + "grad_norm": 0.49126894683922456, + "learning_rate": 2.2229773398103606e-05, + "loss": 0.3507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16989147663116455, + "step": 1620, + "valid_targets_mean": 3596.0, + "valid_targets_min": 937 + }, + { + "epoch": 2.6, + "grad_norm": 0.41955570335468, + "learning_rate": 2.2118714754388323e-05, + "loss": 0.3514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17922857403755188, + "step": 1625, + "valid_targets_mean": 5167.9, + "valid_targets_min": 1261 + }, + { + "epoch": 2.608, + "grad_norm": 0.4466338468560662, + "learning_rate": 2.200758999864449e-05, + "loss": 0.3404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23674200475215912, + "step": 1630, + "valid_targets_mean": 6806.6, + "valid_targets_min": 1415 + }, + { + "epoch": 2.616, + "grad_norm": 0.42959952004699986, + "learning_rate": 2.1896402598390818e-05, + "loss": 0.3535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18198445439338684, + "step": 1635, + "valid_targets_mean": 4071.1, + "valid_targets_min": 948 + }, + { + "epoch": 2.624, + "grad_norm": 0.6276829545612954, + "learning_rate": 2.178515602310074e-05, + "loss": 0.3686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3186021149158478, + "step": 1640, + "valid_targets_mean": 4211.4, + "valid_targets_min": 1217 + }, + { + "epoch": 2.632, + "grad_norm": 0.4505510976461976, + "learning_rate": 2.1673853744094193e-05, + "loss": 0.3974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29543089866638184, + "step": 1645, + "valid_targets_mean": 7278.0, + "valid_targets_min": 1524 + }, + { + "epoch": 2.64, + "grad_norm": 0.37719949029802746, + "learning_rate": 2.1562499234429283e-05, + "loss": 0.3246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15694212913513184, + "step": 1650, + "valid_targets_mean": 6930.1, + "valid_targets_min": 1011 + }, + { + "epoch": 2.648, + "grad_norm": 0.46205859204161104, + "learning_rate": 2.1451095968793908e-05, + "loss": 0.339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.149054616689682, + "step": 1655, + "valid_targets_mean": 3161.5, + "valid_targets_min": 965 + }, + { + "epoch": 2.656, + "grad_norm": 0.5109521277662005, + "learning_rate": 2.1339647423397337e-05, + "loss": 0.3685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1951621025800705, + "step": 1660, + "valid_targets_mean": 4541.2, + "valid_targets_min": 775 + }, + { + "epoch": 2.664, + "grad_norm": 0.5496498798895945, + "learning_rate": 2.122815707586176e-05, + "loss": 0.3509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19943974912166595, + "step": 1665, + "valid_targets_mean": 3634.1, + "valid_targets_min": 895 + }, + { + "epoch": 2.672, + "grad_norm": 0.5440227120286691, + "learning_rate": 2.111662840511373e-05, + "loss": 0.3516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1294553130865097, + "step": 1670, + "valid_targets_mean": 2321.6, + "valid_targets_min": 661 + }, + { + "epoch": 2.68, + "grad_norm": 0.49077600939023164, + "learning_rate": 2.1005064891275638e-05, + "loss": 0.3567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14773976802825928, + "step": 1675, + "valid_targets_mean": 4320.6, + "valid_targets_min": 886 + }, + { + "epoch": 2.6879999999999997, + "grad_norm": 0.391281755034438, + "learning_rate": 2.0893470015557126e-05, + "loss": 0.3396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10046662390232086, + "step": 1680, + "valid_targets_mean": 5169.5, + "valid_targets_min": 982 + }, + { + "epoch": 2.6959999999999997, + "grad_norm": 0.45801761659477147, + "learning_rate": 2.078184726014643e-05, + "loss": 0.3712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1757431924343109, + "step": 1685, + "valid_targets_mean": 4211.1, + "valid_targets_min": 1628 + }, + { + "epoch": 2.7039999999999997, + "grad_norm": 0.4549840031695572, + "learning_rate": 2.0670200108101754e-05, + "loss": 0.3328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1375683844089508, + "step": 1690, + "valid_targets_mean": 3498.6, + "valid_targets_min": 1023 + }, + { + "epoch": 2.7119999999999997, + "grad_norm": 0.4915990617682205, + "learning_rate": 2.0558532043242557e-05, + "loss": 0.3437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0870099812746048, + "step": 1695, + "valid_targets_mean": 3573.2, + "valid_targets_min": 574 + }, + { + "epoch": 2.7199999999999998, + "grad_norm": 0.5924797281596321, + "learning_rate": 2.0446846550040863e-05, + "loss": 0.3685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22033245861530304, + "step": 1700, + "valid_targets_mean": 2877.0, + "valid_targets_min": 854 + }, + { + "epoch": 2.7279999999999998, + "grad_norm": 0.43877107974753327, + "learning_rate": 2.033514711351253e-05, + "loss": 0.3527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15773963928222656, + "step": 1705, + "valid_targets_mean": 4485.2, + "valid_targets_min": 941 + }, + { + "epoch": 2.7359999999999998, + "grad_norm": 0.530664944625799, + "learning_rate": 2.022343721910851e-05, + "loss": 0.3421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2027740180492401, + "step": 1710, + "valid_targets_mean": 4494.0, + "valid_targets_min": 1226 + }, + { + "epoch": 2.7439999999999998, + "grad_norm": 0.6038027202052175, + "learning_rate": 2.0111720352606054e-05, + "loss": 0.3601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19138199090957642, + "step": 1715, + "valid_targets_mean": 3350.9, + "valid_targets_min": 1106 + }, + { + "epoch": 2.752, + "grad_norm": 0.48781174342181044, + "learning_rate": 2e-05, + "loss": 0.3113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19703620672225952, + "step": 1720, + "valid_targets_mean": 5906.1, + "valid_targets_min": 1657 + }, + { + "epoch": 2.76, + "grad_norm": 0.4902966583040599, + "learning_rate": 1.988827964739395e-05, + "loss": 0.3293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12021969258785248, + "step": 1725, + "valid_targets_mean": 2399.0, + "valid_targets_min": 897 + }, + { + "epoch": 2.768, + "grad_norm": 0.47146153893588105, + "learning_rate": 1.9776562780891494e-05, + "loss": 0.3446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14058835804462433, + "step": 1730, + "valid_targets_mean": 2738.2, + "valid_targets_min": 1074 + }, + { + "epoch": 2.776, + "grad_norm": 0.5388251301985688, + "learning_rate": 1.966485288648747e-05, + "loss": 0.3284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16873209178447723, + "step": 1735, + "valid_targets_mean": 3595.6, + "valid_targets_min": 844 + }, + { + "epoch": 2.784, + "grad_norm": 0.5085083642233609, + "learning_rate": 1.9553153449959144e-05, + "loss": 0.3548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17369981110095978, + "step": 1740, + "valid_targets_mean": 4477.5, + "valid_targets_min": 905 + }, + { + "epoch": 2.792, + "grad_norm": 0.49910923546702074, + "learning_rate": 1.9441467956757453e-05, + "loss": 0.3631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12397250533103943, + "step": 1745, + "valid_targets_mean": 2318.6, + "valid_targets_min": 868 + }, + { + "epoch": 2.8, + "grad_norm": 0.48862405925011343, + "learning_rate": 1.9329799891898256e-05, + "loss": 0.3353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23336371779441833, + "step": 1750, + "valid_targets_mean": 4532.2, + "valid_targets_min": 1037 + }, + { + "epoch": 2.808, + "grad_norm": 0.39497314431233704, + "learning_rate": 1.9218152739853576e-05, + "loss": 0.3471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1946973204612732, + "step": 1755, + "valid_targets_mean": 5582.1, + "valid_targets_min": 710 + }, + { + "epoch": 2.816, + "grad_norm": 0.5003296582395104, + "learning_rate": 1.9106529984442884e-05, + "loss": 0.3275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2435501217842102, + "step": 1760, + "valid_targets_mean": 5183.2, + "valid_targets_min": 1061 + }, + { + "epoch": 2.824, + "grad_norm": 0.52781859107085, + "learning_rate": 1.8994935108724366e-05, + "loss": 0.345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24607789516448975, + "step": 1765, + "valid_targets_mean": 4977.9, + "valid_targets_min": 668 + }, + { + "epoch": 2.832, + "grad_norm": 0.4870668187855425, + "learning_rate": 1.8883371594886276e-05, + "loss": 0.3378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17322807013988495, + "step": 1770, + "valid_targets_mean": 4143.9, + "valid_targets_min": 721 + }, + { + "epoch": 2.84, + "grad_norm": 0.5501480651217011, + "learning_rate": 1.877184292413824e-05, + "loss": 0.3385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1610127091407776, + "step": 1775, + "valid_targets_mean": 2323.1, + "valid_targets_min": 760 + }, + { + "epoch": 2.848, + "grad_norm": 0.5665295596550164, + "learning_rate": 1.8660352576602663e-05, + "loss": 0.344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23653550446033478, + "step": 1780, + "valid_targets_mean": 4046.8, + "valid_targets_min": 559 + }, + { + "epoch": 2.856, + "grad_norm": 0.47013239118348343, + "learning_rate": 1.8548904031206102e-05, + "loss": 0.3472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20719118416309357, + "step": 1785, + "valid_targets_mean": 5973.1, + "valid_targets_min": 824 + }, + { + "epoch": 2.864, + "grad_norm": 0.4773248045425617, + "learning_rate": 1.843750076557072e-05, + "loss": 0.3382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18604245781898499, + "step": 1790, + "valid_targets_mean": 5011.6, + "valid_targets_min": 1589 + }, + { + "epoch": 2.872, + "grad_norm": 0.7432332329422374, + "learning_rate": 1.832614625590581e-05, + "loss": 0.3387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20564137399196625, + "step": 1795, + "valid_targets_mean": 2076.8, + "valid_targets_min": 701 + }, + { + "epoch": 2.88, + "grad_norm": 0.5780630575919011, + "learning_rate": 1.8214843976899264e-05, + "loss": 0.3475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.168882817029953, + "step": 1800, + "valid_targets_mean": 2690.0, + "valid_targets_min": 973 + }, + { + "epoch": 2.888, + "grad_norm": 0.4048713565752578, + "learning_rate": 1.810359740160919e-05, + "loss": 0.3294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11568081378936768, + "step": 1805, + "valid_targets_mean": 4304.0, + "valid_targets_min": 1228 + }, + { + "epoch": 2.896, + "grad_norm": 0.5039084001370419, + "learning_rate": 1.7992410001355515e-05, + "loss": 0.3471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1639002561569214, + "step": 1810, + "valid_targets_mean": 3705.6, + "valid_targets_min": 678 + }, + { + "epoch": 2.904, + "grad_norm": 0.8029006281339147, + "learning_rate": 1.788128524561168e-05, + "loss": 0.342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15965096652507782, + "step": 1815, + "valid_targets_mean": 1893.2, + "valid_targets_min": 908 + }, + { + "epoch": 2.912, + "grad_norm": 0.45687805144339416, + "learning_rate": 1.7770226601896397e-05, + "loss": 0.3296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1370457112789154, + "step": 1820, + "valid_targets_mean": 3719.6, + "valid_targets_min": 1436 + }, + { + "epoch": 2.92, + "grad_norm": 0.4584149777451423, + "learning_rate": 1.7659237535665404e-05, + "loss": 0.3318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18669384717941284, + "step": 1825, + "valid_targets_mean": 4329.4, + "valid_targets_min": 792 + }, + { + "epoch": 2.928, + "grad_norm": 0.3857893662873705, + "learning_rate": 1.754832151020337e-05, + "loss": 0.3181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17508453130722046, + "step": 1830, + "valid_targets_mean": 6344.2, + "valid_targets_min": 807 + }, + { + "epoch": 2.936, + "grad_norm": 0.5909250437931737, + "learning_rate": 1.74374819865158e-05, + "loss": 0.318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1460743248462677, + "step": 1835, + "valid_targets_mean": 2246.0, + "valid_targets_min": 1028 + }, + { + "epoch": 2.944, + "grad_norm": 0.6168729718884929, + "learning_rate": 1.7326722423221057e-05, + "loss": 0.3609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16383513808250427, + "step": 1840, + "valid_targets_mean": 2735.9, + "valid_targets_min": 1176 + }, + { + "epoch": 2.952, + "grad_norm": 0.35833838993310363, + "learning_rate": 1.7216046276442438e-05, + "loss": 0.3415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1925455927848816, + "step": 1845, + "valid_targets_mean": 8259.6, + "valid_targets_min": 1127 + }, + { + "epoch": 2.96, + "grad_norm": 0.5568115528689965, + "learning_rate": 1.7105456999700306e-05, + "loss": 0.3607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18549929559230804, + "step": 1850, + "valid_targets_mean": 3148.5, + "valid_targets_min": 829 + }, + { + "epoch": 2.968, + "grad_norm": 0.44855376408233205, + "learning_rate": 1.6994958043804374e-05, + "loss": 0.3437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14252299070358276, + "step": 1855, + "valid_targets_mean": 3142.0, + "valid_targets_min": 1217 + }, + { + "epoch": 2.976, + "grad_norm": 0.4728038820881367, + "learning_rate": 1.6884552856745972e-05, + "loss": 0.3444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10633358359336853, + "step": 1860, + "valid_targets_mean": 1938.1, + "valid_targets_min": 506 + }, + { + "epoch": 2.984, + "grad_norm": 0.6465001523766998, + "learning_rate": 1.6774244883590503e-05, + "loss": 0.3675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1333983838558197, + "step": 1865, + "valid_targets_mean": 1810.0, + "valid_targets_min": 721 + }, + { + "epoch": 2.992, + "grad_norm": 0.42614834103402127, + "learning_rate": 1.6664037566369905e-05, + "loss": 0.3441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14628413319587708, + "step": 1870, + "valid_targets_mean": 5585.8, + "valid_targets_min": 1323 + }, + { + "epoch": 3.0, + "grad_norm": 0.5627282905947615, + "learning_rate": 1.6553934343975273e-05, + "loss": 0.365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25714242458343506, + "step": 1875, + "valid_targets_mean": 4970.8, + "valid_targets_min": 1718 + }, + { + "epoch": 3.008, + "grad_norm": 0.5335261046503706, + "learning_rate": 1.644393865204955e-05, + "loss": 0.3588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12273615598678589, + "step": 1880, + "valid_targets_mean": 3116.5, + "valid_targets_min": 1219 + }, + { + "epoch": 3.016, + "grad_norm": 1.132349702215561, + "learning_rate": 1.6334053922880304e-05, + "loss": 0.3097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12334040552377701, + "step": 1885, + "valid_targets_mean": 5509.8, + "valid_targets_min": 1485 + }, + { + "epoch": 3.024, + "grad_norm": 0.44315501434081866, + "learning_rate": 1.622428358529265e-05, + "loss": 0.3306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1480335295200348, + "step": 1890, + "valid_targets_mean": 3953.9, + "valid_targets_min": 596 + }, + { + "epoch": 3.032, + "grad_norm": 0.588510096981289, + "learning_rate": 1.611463106454224e-05, + "loss": 0.3392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23434148728847504, + "step": 1895, + "valid_targets_mean": 4087.4, + "valid_targets_min": 1260 + }, + { + "epoch": 3.04, + "grad_norm": 0.42621894302985963, + "learning_rate": 1.6005099782208392e-05, + "loss": 0.3477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18396449089050293, + "step": 1900, + "valid_targets_mean": 5456.5, + "valid_targets_min": 2731 + }, + { + "epoch": 3.048, + "grad_norm": 0.41823653199244526, + "learning_rate": 1.5895693156087317e-05, + "loss": 0.3079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19087563455104828, + "step": 1905, + "valid_targets_mean": 6455.0, + "valid_targets_min": 1013 + }, + { + "epoch": 3.056, + "grad_norm": 0.4782619422592544, + "learning_rate": 1.578641460008548e-05, + "loss": 0.3172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1430778205394745, + "step": 1910, + "valid_targets_mean": 3750.5, + "valid_targets_min": 725 + }, + { + "epoch": 3.064, + "grad_norm": 0.4189615311261732, + "learning_rate": 1.5677267524113054e-05, + "loss": 0.3169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15594616532325745, + "step": 1915, + "valid_targets_mean": 6085.0, + "valid_targets_min": 1033 + }, + { + "epoch": 3.072, + "grad_norm": 0.4552341804156781, + "learning_rate": 1.5568255333977547e-05, + "loss": 0.2908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1645529717206955, + "step": 1920, + "valid_targets_mean": 4930.0, + "valid_targets_min": 1100 + }, + { + "epoch": 3.08, + "grad_norm": 0.4123722916613195, + "learning_rate": 1.5459381431277506e-05, + "loss": 0.326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10328075289726257, + "step": 1925, + "valid_targets_mean": 4879.5, + "valid_targets_min": 886 + }, + { + "epoch": 3.088, + "grad_norm": 0.49773690652553637, + "learning_rate": 1.5350649213296373e-05, + "loss": 0.3312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1240905225276947, + "step": 1930, + "valid_targets_mean": 2668.1, + "valid_targets_min": 936 + }, + { + "epoch": 3.096, + "grad_norm": 0.45278449933874565, + "learning_rate": 1.5242062072896483e-05, + "loss": 0.3371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13505850732326508, + "step": 1935, + "valid_targets_mean": 3849.6, + "valid_targets_min": 845 + }, + { + "epoch": 3.104, + "grad_norm": 0.3913986266762585, + "learning_rate": 1.5133623398413209e-05, + "loss": 0.3189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1345825046300888, + "step": 1940, + "valid_targets_mean": 4995.1, + "valid_targets_min": 1052 + }, + { + "epoch": 3.112, + "grad_norm": 0.6573102578659725, + "learning_rate": 1.50253365735492e-05, + "loss": 0.3329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13540002703666687, + "step": 1945, + "valid_targets_mean": 1855.5, + "valid_targets_min": 617 + }, + { + "epoch": 3.12, + "grad_norm": 0.5353571868182, + "learning_rate": 1.4917204977268833e-05, + "loss": 0.3487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10793092101812363, + "step": 1950, + "valid_targets_mean": 2389.6, + "valid_targets_min": 499 + }, + { + "epoch": 3.128, + "grad_norm": 0.45964730669624526, + "learning_rate": 1.4809231983692733e-05, + "loss": 0.3234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18516826629638672, + "step": 1955, + "valid_targets_mean": 6327.5, + "valid_targets_min": 1400 + }, + { + "epoch": 3.136, + "grad_norm": 0.5326551897217631, + "learning_rate": 1.4701420961992533e-05, + "loss": 0.309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11213398724794388, + "step": 1960, + "valid_targets_mean": 2593.1, + "valid_targets_min": 638 + }, + { + "epoch": 3.144, + "grad_norm": 0.5727183007723465, + "learning_rate": 1.459377527628571e-05, + "loss": 0.3177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0960133746266365, + "step": 1965, + "valid_targets_mean": 1934.0, + "valid_targets_min": 576 + }, + { + "epoch": 3.152, + "grad_norm": 0.5747836643926282, + "learning_rate": 1.4486298285530634e-05, + "loss": 0.3613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2422075867652893, + "step": 1970, + "valid_targets_mean": 4018.4, + "valid_targets_min": 722 + }, + { + "epoch": 3.16, + "grad_norm": 0.4200083023418681, + "learning_rate": 1.4378993343421736e-05, + "loss": 0.3153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13149552047252655, + "step": 1975, + "valid_targets_mean": 4988.6, + "valid_targets_min": 1289 + }, + { + "epoch": 3.168, + "grad_norm": 0.5127779132992235, + "learning_rate": 1.4271863798284877e-05, + "loss": 0.3095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12630169093608856, + "step": 1980, + "valid_targets_mean": 2393.5, + "valid_targets_min": 690 + }, + { + "epoch": 3.176, + "grad_norm": 0.5792388346676679, + "learning_rate": 1.4164912992972846e-05, + "loss": 0.3446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16788384318351746, + "step": 1985, + "valid_targets_mean": 3161.4, + "valid_targets_min": 829 + }, + { + "epoch": 3.184, + "grad_norm": 0.5717202482455881, + "learning_rate": 1.4058144264761087e-05, + "loss": 0.2997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19649925827980042, + "step": 1990, + "valid_targets_mean": 4664.4, + "valid_targets_min": 955 + }, + { + "epoch": 3.192, + "grad_norm": 0.6151627913778978, + "learning_rate": 1.3951560945243517e-05, + "loss": 0.3316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16047929227352142, + "step": 1995, + "valid_targets_mean": 2772.8, + "valid_targets_min": 793 + }, + { + "epoch": 3.2, + "grad_norm": 0.4242933168481296, + "learning_rate": 1.3845166360228597e-05, + "loss": 0.3652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19517377018928528, + "step": 2000, + "valid_targets_mean": 6388.5, + "valid_targets_min": 495 + }, + { + "epoch": 3.208, + "grad_norm": 0.5003469604574386, + "learning_rate": 1.3738963829635559e-05, + "loss": 0.3156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16463890671730042, + "step": 2005, + "valid_targets_mean": 4910.6, + "valid_targets_min": 833 + }, + { + "epoch": 3.216, + "grad_norm": 0.9508204216990079, + "learning_rate": 1.3632956667390784e-05, + "loss": 0.2938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14386102557182312, + "step": 2010, + "valid_targets_mean": 3383.9, + "valid_targets_min": 1221 + }, + { + "epoch": 3.224, + "grad_norm": 0.47315611427798243, + "learning_rate": 1.3527148181324425e-05, + "loss": 0.3215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1463852822780609, + "step": 2015, + "valid_targets_mean": 4139.1, + "valid_targets_min": 881 + }, + { + "epoch": 3.232, + "grad_norm": 0.4910524255424862, + "learning_rate": 1.3421541673067168e-05, + "loss": 0.3058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19467991590499878, + "step": 2020, + "valid_targets_mean": 5570.2, + "valid_targets_min": 1334 + }, + { + "epoch": 3.24, + "grad_norm": 0.42714613992182937, + "learning_rate": 1.3316140437947207e-05, + "loss": 0.2985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12120620906352997, + "step": 2025, + "valid_targets_mean": 4442.6, + "valid_targets_min": 704 + }, + { + "epoch": 3.248, + "grad_norm": 0.5255009932566614, + "learning_rate": 1.321094776488745e-05, + "loss": 0.3184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1309448778629303, + "step": 2030, + "valid_targets_mean": 3026.9, + "valid_targets_min": 999 + }, + { + "epoch": 3.2560000000000002, + "grad_norm": 0.530757098061631, + "learning_rate": 1.3105966936302856e-05, + "loss": 0.3272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1335388720035553, + "step": 2035, + "valid_targets_mean": 3075.5, + "valid_targets_min": 1200 + }, + { + "epoch": 3.2640000000000002, + "grad_norm": 0.4322808474453205, + "learning_rate": 1.3001201227998023e-05, + "loss": 0.3245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17669960856437683, + "step": 2040, + "valid_targets_mean": 8023.5, + "valid_targets_min": 574 + }, + { + "epoch": 3.2720000000000002, + "grad_norm": 0.45545029411121973, + "learning_rate": 1.2896653909064964e-05, + "loss": 0.3091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12387904524803162, + "step": 2045, + "valid_targets_mean": 4796.9, + "valid_targets_min": 944 + }, + { + "epoch": 3.2800000000000002, + "grad_norm": 0.4647985673919204, + "learning_rate": 1.2792328241781124e-05, + "loss": 0.3246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13686639070510864, + "step": 2050, + "valid_targets_mean": 4213.1, + "valid_targets_min": 430 + }, + { + "epoch": 3.288, + "grad_norm": 0.638531208310754, + "learning_rate": 1.2688227481507546e-05, + "loss": 0.3168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12405982613563538, + "step": 2055, + "valid_targets_mean": 3198.2, + "valid_targets_min": 834 + }, + { + "epoch": 3.296, + "grad_norm": 0.3881187620470557, + "learning_rate": 1.258435487658733e-05, + "loss": 0.3283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14476531744003296, + "step": 2060, + "valid_targets_mean": 4399.9, + "valid_targets_min": 971 + }, + { + "epoch": 3.304, + "grad_norm": 0.4425558771523959, + "learning_rate": 1.2480713668244243e-05, + "loss": 0.3007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14966867864131927, + "step": 2065, + "valid_targets_mean": 5938.1, + "valid_targets_min": 1014 + }, + { + "epoch": 3.312, + "grad_norm": 0.47898598677849, + "learning_rate": 1.2377307090481586e-05, + "loss": 0.3288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13591575622558594, + "step": 2070, + "valid_targets_mean": 4385.5, + "valid_targets_min": 721 + }, + { + "epoch": 3.32, + "grad_norm": 0.6460674905120861, + "learning_rate": 1.2274138369981298e-05, + "loss": 0.3089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23174312710762024, + "step": 2075, + "valid_targets_mean": 3097.0, + "valid_targets_min": 630 + }, + { + "epoch": 3.328, + "grad_norm": 0.42734464278344975, + "learning_rate": 1.2171210726003256e-05, + "loss": 0.3276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16050082445144653, + "step": 2080, + "valid_targets_mean": 5419.9, + "valid_targets_min": 867 + }, + { + "epoch": 3.336, + "grad_norm": 0.47895472971752917, + "learning_rate": 1.2068527370284815e-05, + "loss": 0.3176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16464433073997498, + "step": 2085, + "valid_targets_mean": 3824.9, + "valid_targets_min": 1169 + }, + { + "epoch": 3.344, + "grad_norm": 0.4240551447146959, + "learning_rate": 1.1966091506940616e-05, + "loss": 0.3274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11462630331516266, + "step": 2090, + "valid_targets_mean": 4184.4, + "valid_targets_min": 832 + }, + { + "epoch": 3.352, + "grad_norm": 0.7394655211812231, + "learning_rate": 1.1863906332362569e-05, + "loss": 0.3628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1140076294541359, + "step": 2095, + "valid_targets_mean": 2578.2, + "valid_targets_min": 786 + }, + { + "epoch": 3.36, + "grad_norm": 0.41922280281116975, + "learning_rate": 1.176197503512015e-05, + "loss": 0.3148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.159539595246315, + "step": 2100, + "valid_targets_mean": 5751.0, + "valid_targets_min": 808 + }, + { + "epoch": 3.368, + "grad_norm": 0.5584401926293026, + "learning_rate": 1.1660300795860877e-05, + "loss": 0.3253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1747303605079651, + "step": 2105, + "valid_targets_mean": 2999.6, + "valid_targets_min": 834 + }, + { + "epoch": 3.376, + "grad_norm": 0.4166954589757251, + "learning_rate": 1.1558886787211071e-05, + "loss": 0.2865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1294165849685669, + "step": 2110, + "valid_targets_mean": 4363.6, + "valid_targets_min": 546 + }, + { + "epoch": 3.384, + "grad_norm": 0.5662294561036999, + "learning_rate": 1.1457736173676883e-05, + "loss": 0.3209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12514197826385498, + "step": 2115, + "valid_targets_mean": 2388.6, + "valid_targets_min": 974 + }, + { + "epoch": 3.392, + "grad_norm": 0.5059434791101876, + "learning_rate": 1.1356852111545493e-05, + "loss": 0.3481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16026899218559265, + "step": 2120, + "valid_targets_mean": 3593.6, + "valid_targets_min": 1325 + }, + { + "epoch": 3.4, + "grad_norm": 0.5844883650373187, + "learning_rate": 1.1256237748786675e-05, + "loss": 0.3326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22520673274993896, + "step": 2125, + "valid_targets_mean": 3685.5, + "valid_targets_min": 379 + }, + { + "epoch": 3.408, + "grad_norm": 0.5009787440262468, + "learning_rate": 1.1155896224954543e-05, + "loss": 0.3357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12661316990852356, + "step": 2130, + "valid_targets_mean": 3547.5, + "valid_targets_min": 816 + }, + { + "epoch": 3.416, + "grad_norm": 0.48678971220314976, + "learning_rate": 1.1055830671089578e-05, + "loss": 0.3183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16513441503047943, + "step": 2135, + "valid_targets_mean": 5887.6, + "valid_targets_min": 814 + }, + { + "epoch": 3.424, + "grad_norm": 0.5206971097737679, + "learning_rate": 1.0956044209620966e-05, + "loss": 0.3357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13408246636390686, + "step": 2140, + "valid_targets_mean": 3378.6, + "valid_targets_min": 598 + }, + { + "epoch": 3.432, + "grad_norm": 0.47966318102705424, + "learning_rate": 1.0856539954269121e-05, + "loss": 0.323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16585497558116913, + "step": 2145, + "valid_targets_mean": 4378.1, + "valid_targets_min": 798 + }, + { + "epoch": 3.44, + "grad_norm": 0.5122761624297089, + "learning_rate": 1.0757321009948543e-05, + "loss": 0.3438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12634073197841644, + "step": 2150, + "valid_targets_mean": 3119.5, + "valid_targets_min": 975 + }, + { + "epoch": 3.448, + "grad_norm": 0.6182750943275146, + "learning_rate": 1.0658390472670938e-05, + "loss": 0.327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1473451852798462, + "step": 2155, + "valid_targets_mean": 2101.5, + "valid_targets_min": 843 + }, + { + "epoch": 3.456, + "grad_norm": 0.4888023729480413, + "learning_rate": 1.0559751429448597e-05, + "loss": 0.3408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21524950861930847, + "step": 2160, + "valid_targets_mean": 6050.9, + "valid_targets_min": 634 + }, + { + "epoch": 3.464, + "grad_norm": 0.4542480312052559, + "learning_rate": 1.0461406958198101e-05, + "loss": 0.321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12608930468559265, + "step": 2165, + "valid_targets_mean": 4816.9, + "valid_targets_min": 879 + }, + { + "epoch": 3.472, + "grad_norm": 0.5112301560829176, + "learning_rate": 1.0363360127644235e-05, + "loss": 0.3444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21885031461715698, + "step": 2170, + "valid_targets_mean": 6240.8, + "valid_targets_min": 1414 + }, + { + "epoch": 3.48, + "grad_norm": 0.5442050920516187, + "learning_rate": 1.0265613997224255e-05, + "loss": 0.3139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1261734962463379, + "step": 2175, + "valid_targets_mean": 3223.0, + "valid_targets_min": 902 + }, + { + "epoch": 3.488, + "grad_norm": 0.4610573972043477, + "learning_rate": 1.0168171616992422e-05, + "loss": 0.3175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19308221340179443, + "step": 2180, + "valid_targets_mean": 6308.8, + "valid_targets_min": 823 + }, + { + "epoch": 3.496, + "grad_norm": 0.5306677534892053, + "learning_rate": 1.007103602752483e-05, + "loss": 0.3402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1896430402994156, + "step": 2185, + "valid_targets_mean": 3996.8, + "valid_targets_min": 671 + }, + { + "epoch": 3.504, + "grad_norm": 0.5044165875993919, + "learning_rate": 9.974210259824505e-06, + "loss": 0.3336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16915544867515564, + "step": 2190, + "valid_targets_mean": 3629.6, + "valid_targets_min": 1033 + }, + { + "epoch": 3.512, + "grad_norm": 0.6879187845686496, + "learning_rate": 9.877697335226872e-06, + "loss": 0.3439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10717498511075974, + "step": 2195, + "valid_targets_mean": 2186.5, + "valid_targets_min": 687 + }, + { + "epoch": 3.52, + "grad_norm": 0.6250839989818192, + "learning_rate": 9.781500265305448e-06, + "loss": 0.3357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1784055531024933, + "step": 2200, + "valid_targets_mean": 3010.9, + "valid_targets_min": 720 + }, + { + "epoch": 3.528, + "grad_norm": 0.5248629910937874, + "learning_rate": 9.685622051777856e-06, + "loss": 0.3092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23266291618347168, + "step": 2205, + "valid_targets_mean": 5086.4, + "valid_targets_min": 787 + }, + { + "epoch": 3.536, + "grad_norm": 0.4834179595767226, + "learning_rate": 9.590065686412182e-06, + "loss": 0.3233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13310745358467102, + "step": 2210, + "valid_targets_mean": 2614.6, + "valid_targets_min": 925 + }, + { + "epoch": 3.544, + "grad_norm": 0.45602768383743014, + "learning_rate": 9.494834150933616e-06, + "loss": 0.3297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1372455358505249, + "step": 2215, + "valid_targets_mean": 3843.4, + "valid_targets_min": 1099 + }, + { + "epoch": 3.552, + "grad_norm": 0.4887447107244825, + "learning_rate": 9.399930416931404e-06, + "loss": 0.314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10885452479124069, + "step": 2220, + "valid_targets_mean": 3725.2, + "valid_targets_min": 872 + }, + { + "epoch": 3.56, + "grad_norm": 0.5151465841953303, + "learning_rate": 9.30535744576615e-06, + "loss": 0.3353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13782528042793274, + "step": 2225, + "valid_targets_mean": 3750.6, + "valid_targets_min": 1124 + }, + { + "epoch": 3.568, + "grad_norm": 0.5623056848959576, + "learning_rate": 9.211118188477362e-06, + "loss": 0.3311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2320508360862732, + "step": 2230, + "valid_targets_mean": 4508.8, + "valid_targets_min": 662 + }, + { + "epoch": 3.576, + "grad_norm": 0.5178467063324498, + "learning_rate": 9.117215585691408e-06, + "loss": 0.3228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17334342002868652, + "step": 2235, + "valid_targets_mean": 4636.2, + "valid_targets_min": 916 + }, + { + "epoch": 3.584, + "grad_norm": 0.6120278120276946, + "learning_rate": 9.023652567529744e-06, + "loss": 0.3263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11503614485263824, + "step": 2240, + "valid_targets_mean": 2788.6, + "valid_targets_min": 588 + }, + { + "epoch": 3.592, + "grad_norm": 0.4937083479890421, + "learning_rate": 8.930432053517465e-06, + "loss": 0.3191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11475160717964172, + "step": 2245, + "valid_targets_mean": 2826.0, + "valid_targets_min": 957 + }, + { + "epoch": 3.6, + "grad_norm": 0.49253074619254694, + "learning_rate": 8.837556952492264e-06, + "loss": 0.3207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14227665960788727, + "step": 2250, + "valid_targets_mean": 4518.2, + "valid_targets_min": 925 + }, + { + "epoch": 3.608, + "grad_norm": 0.7363097537758285, + "learning_rate": 8.745030162513582e-06, + "loss": 0.3292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21330423653125763, + "step": 2255, + "valid_targets_mean": 3404.9, + "valid_targets_min": 874 + }, + { + "epoch": 3.616, + "grad_norm": 0.4414100721223384, + "learning_rate": 8.652854570772236e-06, + "loss": 0.3232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12081056833267212, + "step": 2260, + "valid_targets_mean": 4392.4, + "valid_targets_min": 984 + }, + { + "epoch": 3.624, + "grad_norm": 0.5606075707240097, + "learning_rate": 8.561033053500312e-06, + "loss": 0.3213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08863774687051773, + "step": 2265, + "valid_targets_mean": 1930.2, + "valid_targets_min": 685 + }, + { + "epoch": 3.632, + "grad_norm": 0.47128304721158626, + "learning_rate": 8.46956847588141e-06, + "loss": 0.3523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12015935778617859, + "step": 2270, + "valid_targets_mean": 3377.5, + "valid_targets_min": 815 + }, + { + "epoch": 3.64, + "grad_norm": 0.5125340858044971, + "learning_rate": 8.378463691961237e-06, + "loss": 0.2898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14381183683872223, + "step": 2275, + "valid_targets_mean": 3659.1, + "valid_targets_min": 491 + }, + { + "epoch": 3.648, + "grad_norm": 0.4592889282450072, + "learning_rate": 8.287721544558574e-06, + "loss": 0.3185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10181337594985962, + "step": 2280, + "valid_targets_mean": 3632.9, + "valid_targets_min": 728 + }, + { + "epoch": 3.656, + "grad_norm": 0.5965684017173326, + "learning_rate": 8.197344865176548e-06, + "loss": 0.3197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12914949655532837, + "step": 2285, + "valid_targets_mean": 3181.9, + "valid_targets_min": 1078 + }, + { + "epoch": 3.664, + "grad_norm": 0.6228428043708024, + "learning_rate": 8.10733647391427e-06, + "loss": 0.355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1971237063407898, + "step": 2290, + "valid_targets_mean": 3335.1, + "valid_targets_min": 791 + }, + { + "epoch": 3.672, + "grad_norm": 0.5979530685752266, + "learning_rate": 8.017699179378849e-06, + "loss": 0.3396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22264941036701202, + "step": 2295, + "valid_targets_mean": 3038.4, + "valid_targets_min": 830 + }, + { + "epoch": 3.68, + "grad_norm": 0.4458519642564752, + "learning_rate": 7.928435778597763e-06, + "loss": 0.3209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1323288083076477, + "step": 2300, + "valid_targets_mean": 3766.8, + "valid_targets_min": 874 + }, + { + "epoch": 3.6879999999999997, + "grad_norm": 0.5205119846178907, + "learning_rate": 7.839549056931557e-06, + "loss": 0.3348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16188280284404755, + "step": 2305, + "valid_targets_mean": 3806.9, + "valid_targets_min": 1001 + }, + { + "epoch": 3.6959999999999997, + "grad_norm": 0.6321612059563577, + "learning_rate": 7.751041787986965e-06, + "loss": 0.3147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20028778910636902, + "step": 2310, + "valid_targets_mean": 5492.1, + "valid_targets_min": 905 + }, + { + "epoch": 3.7039999999999997, + "grad_norm": 0.4856071600672378, + "learning_rate": 7.662916733530317e-06, + "loss": 0.2987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20045016705989838, + "step": 2315, + "valid_targets_mean": 5160.9, + "valid_targets_min": 1459 + }, + { + "epoch": 3.7119999999999997, + "grad_norm": 0.6190037743730318, + "learning_rate": 7.575176643401394e-06, + "loss": 0.3144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19084815680980682, + "step": 2320, + "valid_targets_mean": 3074.1, + "valid_targets_min": 740 + }, + { + "epoch": 3.7199999999999998, + "grad_norm": 0.47802774359013706, + "learning_rate": 7.487824255427616e-06, + "loss": 0.3462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2085719108581543, + "step": 2325, + "valid_targets_mean": 5742.2, + "valid_targets_min": 1804 + }, + { + "epoch": 3.7279999999999998, + "grad_norm": 0.6141982017666223, + "learning_rate": 7.400862295338595e-06, + "loss": 0.319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1338297575712204, + "step": 2330, + "valid_targets_mean": 3026.8, + "valid_targets_min": 869 + }, + { + "epoch": 3.7359999999999998, + "grad_norm": 0.4272699620447183, + "learning_rate": 7.314293476681122e-06, + "loss": 0.3269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12799906730651855, + "step": 2335, + "valid_targets_mean": 5400.1, + "valid_targets_min": 920 + }, + { + "epoch": 3.7439999999999998, + "grad_norm": 0.558531390187902, + "learning_rate": 7.228120500734443e-06, + "loss": 0.3094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1416444331407547, + "step": 2340, + "valid_targets_mean": 3904.5, + "valid_targets_min": 665 + }, + { + "epoch": 3.752, + "grad_norm": 0.5957360903717863, + "learning_rate": 7.1423460564259995e-06, + "loss": 0.3211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1542271375656128, + "step": 2345, + "valid_targets_mean": 2498.1, + "valid_targets_min": 725 + }, + { + "epoch": 3.76, + "grad_norm": 0.5749982706805087, + "learning_rate": 7.056972820247516e-06, + "loss": 0.3403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24424612522125244, + "step": 2350, + "valid_targets_mean": 4599.0, + "valid_targets_min": 586 + }, + { + "epoch": 3.768, + "grad_norm": 0.7004621373727494, + "learning_rate": 6.97200345617149e-06, + "loss": 0.3132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.261954128742218, + "step": 2355, + "valid_targets_mean": 3538.9, + "valid_targets_min": 911 + }, + { + "epoch": 3.776, + "grad_norm": 0.4631600705700879, + "learning_rate": 6.887440615568044e-06, + "loss": 0.3387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13582396507263184, + "step": 2360, + "valid_targets_mean": 4712.9, + "valid_targets_min": 1222 + }, + { + "epoch": 3.784, + "grad_norm": 0.5495931240245518, + "learning_rate": 6.803286937122233e-06, + "loss": 0.3371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23861850798130035, + "step": 2365, + "valid_targets_mean": 4753.6, + "valid_targets_min": 980 + }, + { + "epoch": 3.792, + "grad_norm": 0.6024280451059767, + "learning_rate": 6.719545046751674e-06, + "loss": 0.3315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21030789613723755, + "step": 2370, + "valid_targets_mean": 2878.9, + "valid_targets_min": 895 + }, + { + "epoch": 3.8, + "grad_norm": 0.6147492284481915, + "learning_rate": 6.636217557524605e-06, + "loss": 0.3522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.161615788936615, + "step": 2375, + "valid_targets_mean": 2665.0, + "valid_targets_min": 634 + }, + { + "epoch": 3.808, + "grad_norm": 0.5623305812954921, + "learning_rate": 6.55330706957837e-06, + "loss": 0.3327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2057649940252304, + "step": 2380, + "valid_targets_mean": 4465.9, + "valid_targets_min": 1127 + }, + { + "epoch": 3.816, + "grad_norm": 0.5283338376179509, + "learning_rate": 6.4708161700382655e-06, + "loss": 0.3246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1588209867477417, + "step": 2385, + "valid_targets_mean": 3723.0, + "valid_targets_min": 807 + }, + { + "epoch": 3.824, + "grad_norm": 0.4510470301147515, + "learning_rate": 6.388747432936819e-06, + "loss": 0.3195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12806157767772675, + "step": 2390, + "valid_targets_mean": 3841.9, + "valid_targets_min": 981 + }, + { + "epoch": 3.832, + "grad_norm": 0.5436971472039873, + "learning_rate": 6.3071034191334915e-06, + "loss": 0.3228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1942981779575348, + "step": 2395, + "valid_targets_mean": 3798.4, + "valid_targets_min": 732 + }, + { + "epoch": 3.84, + "grad_norm": 0.5018428572701472, + "learning_rate": 6.22588667623472e-06, + "loss": 0.3207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13997101783752441, + "step": 2400, + "valid_targets_mean": 3812.2, + "valid_targets_min": 708 + }, + { + "epoch": 3.848, + "grad_norm": 0.582055372382792, + "learning_rate": 6.145099738514466e-06, + "loss": 0.3218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2213214933872223, + "step": 2405, + "valid_targets_mean": 3403.4, + "valid_targets_min": 819 + }, + { + "epoch": 3.856, + "grad_norm": 0.583928362005403, + "learning_rate": 6.064745126835112e-06, + "loss": 0.3025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16777969896793365, + "step": 2410, + "valid_targets_mean": 3357.1, + "valid_targets_min": 860 + }, + { + "epoch": 3.864, + "grad_norm": 0.6142754993162113, + "learning_rate": 5.984825348568812e-06, + "loss": 0.3007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19814524054527283, + "step": 2415, + "valid_targets_mean": 2603.5, + "valid_targets_min": 831 + }, + { + "epoch": 3.872, + "grad_norm": 0.4195895918414985, + "learning_rate": 5.905342897519262e-06, + "loss": 0.33, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12075480818748474, + "step": 2420, + "valid_targets_mean": 4224.9, + "valid_targets_min": 551 + }, + { + "epoch": 3.88, + "grad_norm": 0.6108959292753345, + "learning_rate": 5.826300253843851e-06, + "loss": 0.3366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15773876011371613, + "step": 2425, + "valid_targets_mean": 2816.1, + "valid_targets_min": 930 + }, + { + "epoch": 3.888, + "grad_norm": 0.5171359185903159, + "learning_rate": 5.7476998839763035e-06, + "loss": 0.2989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13886040449142456, + "step": 2430, + "valid_targets_mean": 3425.9, + "valid_targets_min": 789 + }, + { + "epoch": 3.896, + "grad_norm": 0.4763178741386556, + "learning_rate": 5.669544240549698e-06, + "loss": 0.3051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16396956145763397, + "step": 2435, + "valid_targets_mean": 4035.2, + "valid_targets_min": 893 + }, + { + "epoch": 3.904, + "grad_norm": 0.4785442559038061, + "learning_rate": 5.591835762319946e-06, + "loss": 0.3111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11912888288497925, + "step": 2440, + "valid_targets_mean": 2858.9, + "valid_targets_min": 566 + }, + { + "epoch": 3.912, + "grad_norm": 0.6233825072887711, + "learning_rate": 5.514576874089683e-06, + "loss": 0.3413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1519429087638855, + "step": 2445, + "valid_targets_mean": 3033.6, + "valid_targets_min": 559 + }, + { + "epoch": 3.92, + "grad_norm": 0.5322851898274968, + "learning_rate": 5.437769986632622e-06, + "loss": 0.341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1855153739452362, + "step": 2450, + "valid_targets_mean": 4399.1, + "valid_targets_min": 1451 + }, + { + "epoch": 3.928, + "grad_norm": 0.44237959812688865, + "learning_rate": 5.361417496618315e-06, + "loss": 0.3117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18377932906150818, + "step": 2455, + "valid_targets_mean": 6480.9, + "valid_targets_min": 1407 + }, + { + "epoch": 3.936, + "grad_norm": 0.7143654129319893, + "learning_rate": 5.285521786537368e-06, + "loss": 0.3282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15518513321876526, + "step": 2460, + "valid_targets_mean": 4235.6, + "valid_targets_min": 1211 + }, + { + "epoch": 3.944, + "grad_norm": 0.4769151407862085, + "learning_rate": 5.2100852246270975e-06, + "loss": 0.3146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21277496218681335, + "step": 2465, + "valid_targets_mean": 5719.1, + "valid_targets_min": 829 + }, + { + "epoch": 3.952, + "grad_norm": 0.6838406068253756, + "learning_rate": 5.135110164797637e-06, + "loss": 0.3305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3378908634185791, + "step": 2470, + "valid_targets_mean": 3928.5, + "valid_targets_min": 664 + }, + { + "epoch": 3.96, + "grad_norm": 0.6130883409393146, + "learning_rate": 5.060598946558484e-06, + "loss": 0.3349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13703122735023499, + "step": 2475, + "valid_targets_mean": 2553.0, + "valid_targets_min": 834 + }, + { + "epoch": 3.968, + "grad_norm": 0.4357023208751496, + "learning_rate": 4.986553894945512e-06, + "loss": 0.2976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16759523749351501, + "step": 2480, + "valid_targets_mean": 4775.6, + "valid_targets_min": 736 + }, + { + "epoch": 3.976, + "grad_norm": 0.638026763808748, + "learning_rate": 4.912977320448391e-06, + "loss": 0.3393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2706812620162964, + "step": 2485, + "valid_targets_mean": 4390.4, + "valid_targets_min": 1011 + }, + { + "epoch": 3.984, + "grad_norm": 0.6395434082611862, + "learning_rate": 4.839871518938513e-06, + "loss": 0.354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18830779194831848, + "step": 2490, + "valid_targets_mean": 3367.8, + "valid_targets_min": 1225 + }, + { + "epoch": 3.992, + "grad_norm": 0.6045521377506226, + "learning_rate": 4.767238771597347e-06, + "loss": 0.3432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.167062908411026, + "step": 2495, + "valid_targets_mean": 2106.9, + "valid_targets_min": 555 + }, + { + "epoch": 4.0, + "grad_norm": 0.5970487451114057, + "learning_rate": 4.695081344845254e-06, + "loss": 0.3183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15452514588832855, + "step": 2500, + "valid_targets_mean": 3056.4, + "valid_targets_min": 506 + }, + { + "epoch": 4.008, + "grad_norm": 0.4416015542929956, + "learning_rate": 4.623401490270778e-06, + "loss": 0.3266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1425788402557373, + "step": 2505, + "valid_targets_mean": 4244.4, + "valid_targets_min": 1097 + }, + { + "epoch": 4.016, + "grad_norm": 0.6050783805568698, + "learning_rate": 4.552201444560373e-06, + "loss": 0.3079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13076642155647278, + "step": 2510, + "valid_targets_mean": 3324.1, + "valid_targets_min": 1050 + }, + { + "epoch": 4.024, + "grad_norm": 0.7064956382591235, + "learning_rate": 4.481483429428615e-06, + "loss": 0.3301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13634774088859558, + "step": 2515, + "valid_targets_mean": 2014.1, + "valid_targets_min": 940 + }, + { + "epoch": 4.032, + "grad_norm": 0.49373001353281315, + "learning_rate": 4.4112496515488765e-06, + "loss": 0.2945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15392452478408813, + "step": 2520, + "valid_targets_mean": 3495.6, + "valid_targets_min": 467 + }, + { + "epoch": 4.04, + "grad_norm": 0.5279529664688594, + "learning_rate": 4.341502302484472e-06, + "loss": 0.3109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19533191621303558, + "step": 2525, + "valid_targets_mean": 4519.1, + "valid_targets_min": 1176 + }, + { + "epoch": 4.048, + "grad_norm": 0.5002800878652447, + "learning_rate": 4.272243558620264e-06, + "loss": 0.2897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14053988456726074, + "step": 2530, + "valid_targets_mean": 3120.2, + "valid_targets_min": 1102 + }, + { + "epoch": 4.056, + "grad_norm": 0.6522314422022066, + "learning_rate": 4.203475581094771e-06, + "loss": 0.3082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20415924489498138, + "step": 2535, + "valid_targets_mean": 3365.0, + "valid_targets_min": 1171 + }, + { + "epoch": 4.064, + "grad_norm": 0.3871114184970602, + "learning_rate": 4.135200515732716e-06, + "loss": 0.3235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13306647539138794, + "step": 2540, + "valid_targets_mean": 6466.8, + "valid_targets_min": 1014 + }, + { + "epoch": 4.072, + "grad_norm": 0.5275110629270116, + "learning_rate": 4.067420492978065e-06, + "loss": 0.3177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19687163829803467, + "step": 2545, + "valid_targets_mean": 4931.0, + "valid_targets_min": 1092 + }, + { + "epoch": 4.08, + "grad_norm": 0.6130234035544103, + "learning_rate": 4.000137627827554e-06, + "loss": 0.3209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16655772924423218, + "step": 2550, + "valid_targets_mean": 2530.0, + "valid_targets_min": 1073 + }, + { + "epoch": 4.088, + "grad_norm": 0.38814066490279997, + "learning_rate": 3.9333540197647035e-06, + "loss": 0.2997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16146323084831238, + "step": 2555, + "valid_targets_mean": 7966.8, + "valid_targets_min": 1103 + }, + { + "epoch": 4.096, + "grad_norm": 0.4590471476508371, + "learning_rate": 3.867071752694282e-06, + "loss": 0.304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1653611660003662, + "step": 2560, + "valid_targets_mean": 4412.4, + "valid_targets_min": 602 + }, + { + "epoch": 4.104, + "grad_norm": 0.5272846889528702, + "learning_rate": 3.8012928948773243e-06, + "loss": 0.3173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18114957213401794, + "step": 2565, + "valid_targets_mean": 4442.1, + "valid_targets_min": 1003 + }, + { + "epoch": 4.112, + "grad_norm": 0.5186772440314499, + "learning_rate": 3.7360194988665364e-06, + "loss": 0.2818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13564951717853546, + "step": 2570, + "valid_targets_mean": 3933.4, + "valid_targets_min": 1004 + }, + { + "epoch": 4.12, + "grad_norm": 0.4328083139216796, + "learning_rate": 3.6712536014422885e-06, + "loss": 0.3033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12977717816829681, + "step": 2575, + "valid_targets_mean": 5225.9, + "valid_targets_min": 871 + }, + { + "epoch": 4.128, + "grad_norm": 0.6420338275118762, + "learning_rate": 3.606997223549049e-06, + "loss": 0.3142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20889759063720703, + "step": 2580, + "valid_targets_mean": 3776.1, + "valid_targets_min": 844 + }, + { + "epoch": 4.136, + "grad_norm": 0.5245741332938453, + "learning_rate": 3.543252370232313e-06, + "loss": 0.3056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1626463085412979, + "step": 2585, + "valid_targets_mean": 4243.8, + "valid_targets_min": 1076 + }, + { + "epoch": 4.144, + "grad_norm": 0.6009260362686207, + "learning_rate": 3.4800210305760662e-06, + "loss": 0.3222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20430579781532288, + "step": 2590, + "valid_targets_mean": 4037.8, + "valid_targets_min": 872 + }, + { + "epoch": 4.152, + "grad_norm": 0.6474293755424925, + "learning_rate": 3.4173051776406817e-06, + "loss": 0.334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17541387677192688, + "step": 2595, + "valid_targets_mean": 3601.9, + "valid_targets_min": 709 + }, + { + "epoch": 4.16, + "grad_norm": 0.5299682943737077, + "learning_rate": 3.3551067684013706e-06, + "loss": 0.3194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17366911470890045, + "step": 2600, + "valid_targets_mean": 4566.0, + "valid_targets_min": 797 + }, + { + "epoch": 4.168, + "grad_norm": 0.5343201098430208, + "learning_rate": 3.2934277436871187e-06, + "loss": 0.3082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18967169523239136, + "step": 2605, + "valid_targets_mean": 5175.8, + "valid_targets_min": 803 + }, + { + "epoch": 4.176, + "grad_norm": 0.706532685786007, + "learning_rate": 3.232270028120121e-06, + "loss": 0.3535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22153635323047638, + "step": 2610, + "valid_targets_mean": 3667.5, + "valid_targets_min": 1042 + }, + { + "epoch": 4.184, + "grad_norm": 0.6583861635037094, + "learning_rate": 3.1716355300557256e-06, + "loss": 0.3241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21890048682689667, + "step": 2615, + "valid_targets_mean": 3347.5, + "valid_targets_min": 993 + }, + { + "epoch": 4.192, + "grad_norm": 0.4972479392885182, + "learning_rate": 3.111526141522896e-06, + "loss": 0.2942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23314082622528076, + "step": 2620, + "valid_targets_mean": 5941.9, + "valid_targets_min": 960 + }, + { + "epoch": 4.2, + "grad_norm": 0.42587188064481973, + "learning_rate": 3.0519437381651507e-06, + "loss": 0.3007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13505974411964417, + "step": 2625, + "valid_targets_mean": 4900.6, + "valid_targets_min": 1129 + }, + { + "epoch": 4.208, + "grad_norm": 0.568223927683394, + "learning_rate": 2.992890179182062e-06, + "loss": 0.3146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19689098000526428, + "step": 2630, + "valid_targets_mean": 4559.5, + "valid_targets_min": 930 + }, + { + "epoch": 4.216, + "grad_norm": 0.3833405874743467, + "learning_rate": 2.93436730727122e-06, + "loss": 0.3283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21802669763565063, + "step": 2635, + "valid_targets_mean": 13101.0, + "valid_targets_min": 704 + }, + { + "epoch": 4.224, + "grad_norm": 0.4360867692750178, + "learning_rate": 2.8763769485707447e-06, + "loss": 0.3004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1010446846485138, + "step": 2640, + "valid_targets_mean": 3420.9, + "valid_targets_min": 694 + }, + { + "epoch": 4.232, + "grad_norm": 0.5394563271831259, + "learning_rate": 2.818920912602294e-06, + "loss": 0.3053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08956138789653778, + "step": 2645, + "valid_targets_mean": 2421.5, + "valid_targets_min": 799 + }, + { + "epoch": 4.24, + "grad_norm": 0.5364665203536897, + "learning_rate": 2.762000992214626e-06, + "loss": 0.3032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13476410508155823, + "step": 2650, + "valid_targets_mean": 4055.1, + "valid_targets_min": 751 + }, + { + "epoch": 4.248, + "grad_norm": 0.6310235572259564, + "learning_rate": 2.7056189635276162e-06, + "loss": 0.3064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2394050657749176, + "step": 2655, + "valid_targets_mean": 4607.8, + "valid_targets_min": 691 + }, + { + "epoch": 4.256, + "grad_norm": 0.5141156610492511, + "learning_rate": 2.6497765858768643e-06, + "loss": 0.3108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19690141081809998, + "step": 2660, + "valid_targets_mean": 4163.4, + "valid_targets_min": 1429 + }, + { + "epoch": 4.264, + "grad_norm": 0.60664896327067, + "learning_rate": 2.594475601758786e-06, + "loss": 0.3045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21876487135887146, + "step": 2665, + "valid_targets_mean": 3978.0, + "valid_targets_min": 1162 + }, + { + "epoch": 4.272, + "grad_norm": 0.586279917157946, + "learning_rate": 2.539717736776237e-06, + "loss": 0.3262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15984392166137695, + "step": 2670, + "valid_targets_mean": 3679.0, + "valid_targets_min": 819 + }, + { + "epoch": 4.28, + "grad_norm": 0.5679469104180861, + "learning_rate": 2.4855046995846844e-06, + "loss": 0.2905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1231120154261589, + "step": 2675, + "valid_targets_mean": 2242.6, + "valid_targets_min": 596 + }, + { + "epoch": 4.288, + "grad_norm": 0.5442418621257851, + "learning_rate": 2.431838181838868e-06, + "loss": 0.3294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15020456910133362, + "step": 2680, + "valid_targets_mean": 4034.4, + "valid_targets_min": 746 + }, + { + "epoch": 4.296, + "grad_norm": 0.6563588104632252, + "learning_rate": 2.3787198581400285e-06, + "loss": 0.3679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19800585508346558, + "step": 2685, + "valid_targets_mean": 3281.4, + "valid_targets_min": 716 + }, + { + "epoch": 4.304, + "grad_norm": 0.6587761603032072, + "learning_rate": 2.3261513859836437e-06, + "loss": 0.3175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2168753743171692, + "step": 2690, + "valid_targets_mean": 3552.4, + "valid_targets_min": 1376 + }, + { + "epoch": 4.312, + "grad_norm": 0.5427138569442952, + "learning_rate": 2.27413440570772e-06, + "loss": 0.2975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13280799984931946, + "step": 2695, + "valid_targets_mean": 2905.9, + "valid_targets_min": 831 + }, + { + "epoch": 4.32, + "grad_norm": 0.4228524942163614, + "learning_rate": 2.222670540441596e-06, + "loss": 0.3268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11188645660877228, + "step": 2700, + "valid_targets_mean": 3543.2, + "valid_targets_min": 1280 + }, + { + "epoch": 4.328, + "grad_norm": 0.4899573286523899, + "learning_rate": 2.17176139605531e-06, + "loss": 0.3425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2020570933818817, + "step": 2705, + "valid_targets_mean": 5368.1, + "valid_targets_min": 704 + }, + { + "epoch": 4.336, + "grad_norm": 0.507636144967628, + "learning_rate": 2.121408561109466e-06, + "loss": 0.3147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18471331894397736, + "step": 2710, + "valid_targets_mean": 5824.5, + "valid_targets_min": 645 + }, + { + "epoch": 4.344, + "grad_norm": 0.3901707775687487, + "learning_rate": 2.071613606805696e-06, + "loss": 0.2986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08599360287189484, + "step": 2715, + "valid_targets_mean": 3438.6, + "valid_targets_min": 700 + }, + { + "epoch": 4.352, + "grad_norm": 0.5742155815605969, + "learning_rate": 2.0223780869376018e-06, + "loss": 0.3148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15944108366966248, + "step": 2720, + "valid_targets_mean": 3927.5, + "valid_targets_min": 1011 + }, + { + "epoch": 4.36, + "grad_norm": 0.419179061441119, + "learning_rate": 1.9737035378422907e-06, + "loss": 0.334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14531302452087402, + "step": 2725, + "valid_targets_mean": 5146.2, + "valid_targets_min": 1062 + }, + { + "epoch": 4.368, + "grad_norm": 0.5893213103013761, + "learning_rate": 1.925591478352424e-06, + "loss": 0.3056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16566333174705505, + "step": 2730, + "valid_targets_mean": 4073.5, + "valid_targets_min": 786 + }, + { + "epoch": 4.376, + "grad_norm": 0.5083568367696376, + "learning_rate": 1.8780434097488443e-06, + "loss": 0.3219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11135862022638321, + "step": 2735, + "valid_targets_mean": 2977.8, + "valid_targets_min": 740 + }, + { + "epoch": 4.384, + "grad_norm": 0.5970456690160377, + "learning_rate": 1.831060815713699e-06, + "loss": 0.3217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15850530564785004, + "step": 2740, + "valid_targets_mean": 3731.6, + "valid_targets_min": 738 + }, + { + "epoch": 4.392, + "grad_norm": 0.4352437971339595, + "learning_rate": 1.7846451622841643e-06, + "loss": 0.3126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16310745477676392, + "step": 2745, + "valid_targets_mean": 4859.1, + "valid_targets_min": 763 + }, + { + "epoch": 4.4, + "grad_norm": 0.7220488054568065, + "learning_rate": 1.7387978978066988e-06, + "loss": 0.3273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12236481159925461, + "step": 2750, + "valid_targets_mean": 2560.9, + "valid_targets_min": 833 + }, + { + "epoch": 4.408, + "grad_norm": 0.4482430441589336, + "learning_rate": 1.6935204528918347e-06, + "loss": 0.3229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15221011638641357, + "step": 2755, + "valid_targets_mean": 4397.8, + "valid_targets_min": 879 + }, + { + "epoch": 4.416, + "grad_norm": 0.6616631272031211, + "learning_rate": 1.6488142403695651e-06, + "loss": 0.3323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2810991704463959, + "step": 2760, + "valid_targets_mean": 4374.9, + "valid_targets_min": 634 + }, + { + "epoch": 4.424, + "grad_norm": 0.6231637576743879, + "learning_rate": 1.6046806552452254e-06, + "loss": 0.3248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1512073576450348, + "step": 2765, + "valid_targets_mean": 2162.0, + "valid_targets_min": 717 + }, + { + "epoch": 4.432, + "grad_norm": 0.546121704100876, + "learning_rate": 1.5611210746559868e-06, + "loss": 0.3282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1441936194896698, + "step": 2770, + "valid_targets_mean": 3542.0, + "valid_targets_min": 916 + }, + { + "epoch": 4.44, + "grad_norm": 0.539850864305549, + "learning_rate": 1.5181368578278744e-06, + "loss": 0.3084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14854803681373596, + "step": 2775, + "valid_targets_mean": 3258.1, + "valid_targets_min": 1298 + }, + { + "epoch": 4.448, + "grad_norm": 0.5230541376311952, + "learning_rate": 1.4757293460333566e-06, + "loss": 0.3112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14559073746204376, + "step": 2780, + "valid_targets_mean": 3888.8, + "valid_targets_min": 896 + }, + { + "epoch": 4.456, + "grad_norm": 0.49941833844922867, + "learning_rate": 1.4338998625494905e-06, + "loss": 0.3233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15301160514354706, + "step": 2785, + "valid_targets_mean": 4101.0, + "valid_targets_min": 871 + }, + { + "epoch": 4.464, + "grad_norm": 0.5879287108121554, + "learning_rate": 1.3926497126166405e-06, + "loss": 0.3202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16979511082172394, + "step": 2790, + "valid_targets_mean": 4361.5, + "valid_targets_min": 843 + }, + { + "epoch": 4.4719999999999995, + "grad_norm": 0.491233445234694, + "learning_rate": 1.3519801833977298e-06, + "loss": 0.3089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16678255796432495, + "step": 2795, + "valid_targets_mean": 4108.4, + "valid_targets_min": 957 + }, + { + "epoch": 4.48, + "grad_norm": 0.4766886110365394, + "learning_rate": 1.3118925439381003e-06, + "loss": 0.2904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12528201937675476, + "step": 2800, + "valid_targets_mean": 3777.0, + "valid_targets_min": 1264 + }, + { + "epoch": 4.4879999999999995, + "grad_norm": 0.47462065492059774, + "learning_rate": 1.2723880451258918e-06, + "loss": 0.3051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17402642965316772, + "step": 2805, + "valid_targets_mean": 6122.8, + "valid_targets_min": 3206 + }, + { + "epoch": 4.496, + "grad_norm": 0.4937259588561042, + "learning_rate": 1.2334679196530219e-06, + "loss": 0.3576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16844210028648376, + "step": 2810, + "valid_targets_mean": 5449.5, + "valid_targets_min": 887 + }, + { + "epoch": 4.504, + "grad_norm": 0.5195655183538852, + "learning_rate": 1.1951333819767163e-06, + "loss": 0.3071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1690118908882141, + "step": 2815, + "valid_targets_mean": 4369.6, + "valid_targets_min": 721 + }, + { + "epoch": 4.5120000000000005, + "grad_norm": 0.645008699848017, + "learning_rate": 1.157385628281622e-06, + "loss": 0.3122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16069874167442322, + "step": 2820, + "valid_targets_mean": 2885.9, + "valid_targets_min": 889 + }, + { + "epoch": 4.52, + "grad_norm": 0.46142203647435265, + "learning_rate": 1.1202258364424633e-06, + "loss": 0.281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10793192684650421, + "step": 2825, + "valid_targets_mean": 4069.0, + "valid_targets_min": 757 + }, + { + "epoch": 4.5280000000000005, + "grad_norm": 0.5591910759629778, + "learning_rate": 1.0836551659873073e-06, + "loss": 0.312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1380782425403595, + "step": 2830, + "valid_targets_mean": 3617.9, + "valid_targets_min": 661 + }, + { + "epoch": 4.536, + "grad_norm": 0.5024384662282567, + "learning_rate": 1.0476747580613723e-06, + "loss": 0.3243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14395056664943695, + "step": 2835, + "valid_targets_mean": 4670.5, + "valid_targets_min": 895 + }, + { + "epoch": 4.5440000000000005, + "grad_norm": 0.4960916051732461, + "learning_rate": 1.012285735391416e-06, + "loss": 0.3108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1764678955078125, + "step": 2840, + "valid_targets_mean": 4985.9, + "valid_targets_min": 404 + }, + { + "epoch": 4.552, + "grad_norm": 0.5442146212646635, + "learning_rate": 9.774892022507166e-07, + "loss": 0.319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13719762861728668, + "step": 2845, + "valid_targets_mean": 3678.2, + "valid_targets_min": 1021 + }, + { + "epoch": 4.5600000000000005, + "grad_norm": 0.5874551803498684, + "learning_rate": 9.432862444245994e-07, + "loss": 0.3123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1982530653476715, + "step": 2850, + "valid_targets_mean": 3730.6, + "valid_targets_min": 800 + }, + { + "epoch": 4.568, + "grad_norm": 0.5392307118781249, + "learning_rate": 9.096779291765667e-07, + "loss": 0.3304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21671076118946075, + "step": 2855, + "valid_targets_mean": 5178.0, + "valid_targets_min": 1265 + }, + { + "epoch": 4.576, + "grad_norm": 0.6120562795608593, + "learning_rate": 8.766653052149831e-07, + "loss": 0.3354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20868843793869019, + "step": 2860, + "valid_targets_mean": 3738.2, + "valid_targets_min": 1599 + }, + { + "epoch": 4.584, + "grad_norm": 0.7117821394769693, + "learning_rate": 8.442494026603709e-07, + "loss": 0.3398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16895297169685364, + "step": 2865, + "valid_targets_mean": 2438.6, + "valid_targets_min": 943 + }, + { + "epoch": 4.592, + "grad_norm": 0.6622067516789324, + "learning_rate": 8.124312330132423e-07, + "loss": 0.3267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11896079778671265, + "step": 2870, + "valid_targets_mean": 2087.6, + "valid_targets_min": 868 + }, + { + "epoch": 4.6, + "grad_norm": 0.5844121637950128, + "learning_rate": 7.812117891225667e-07, + "loss": 0.2815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15796557068824768, + "step": 2875, + "valid_targets_mean": 3266.6, + "valid_targets_min": 1136 + }, + { + "epoch": 4.608, + "grad_norm": 0.6396259105660784, + "learning_rate": 7.505920451547544e-07, + "loss": 0.2991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.193174809217453, + "step": 2880, + "valid_targets_mean": 4270.2, + "valid_targets_min": 972 + }, + { + "epoch": 4.616, + "grad_norm": 0.45054830264551554, + "learning_rate": 7.205729565632947e-07, + "loss": 0.3064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11880641430616379, + "step": 2885, + "valid_targets_mean": 2952.6, + "valid_targets_min": 1052 + }, + { + "epoch": 4.624, + "grad_norm": 0.5867839344286957, + "learning_rate": 6.911554600589121e-07, + "loss": 0.3213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1453874111175537, + "step": 2890, + "valid_targets_mean": 3303.1, + "valid_targets_min": 944 + }, + { + "epoch": 4.632, + "grad_norm": 0.66861340671234, + "learning_rate": 6.62340473580354e-07, + "loss": 0.3191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14077623188495636, + "step": 2895, + "valid_targets_mean": 2540.4, + "valid_targets_min": 1033 + }, + { + "epoch": 4.64, + "grad_norm": 0.5948561509092989, + "learning_rate": 6.341288962657422e-07, + "loss": 0.3266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11293820291757584, + "step": 2900, + "valid_targets_mean": 2276.4, + "valid_targets_min": 1000 + }, + { + "epoch": 4.648, + "grad_norm": 0.4492558783161307, + "learning_rate": 6.06521608424524e-07, + "loss": 0.3436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11917877197265625, + "step": 2905, + "valid_targets_mean": 2935.6, + "valid_targets_min": 697 + }, + { + "epoch": 4.656, + "grad_norm": 0.5260049148715912, + "learning_rate": 5.795194715099905e-07, + "loss": 0.3103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11105750501155853, + "step": 2910, + "valid_targets_mean": 2840.0, + "valid_targets_min": 847 + }, + { + "epoch": 4.664, + "grad_norm": 0.6187219070281177, + "learning_rate": 5.531233280924042e-07, + "loss": 0.3064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18402305245399475, + "step": 2915, + "valid_targets_mean": 3376.0, + "valid_targets_min": 899 + }, + { + "epoch": 4.672, + "grad_norm": 0.6718889715232024, + "learning_rate": 5.273340018327044e-07, + "loss": 0.3303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19413933157920837, + "step": 2920, + "valid_targets_mean": 2698.1, + "valid_targets_min": 727 + }, + { + "epoch": 4.68, + "grad_norm": 0.6687495999234568, + "learning_rate": 5.02152297456806e-07, + "loss": 0.3323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19689956307411194, + "step": 2925, + "valid_targets_mean": 2559.8, + "valid_targets_min": 772 + }, + { + "epoch": 4.688, + "grad_norm": 0.5256698425754467, + "learning_rate": 4.775790007304993e-07, + "loss": 0.3013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24560466408729553, + "step": 2930, + "valid_targets_mean": 6107.5, + "valid_targets_min": 972 + }, + { + "epoch": 4.696, + "grad_norm": 0.5425974097527478, + "learning_rate": 4.5361487843490924e-07, + "loss": 0.293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14793461561203003, + "step": 2935, + "valid_targets_mean": 3443.0, + "valid_targets_min": 678 + }, + { + "epoch": 4.704, + "grad_norm": 0.4596549583922256, + "learning_rate": 4.3026067834258667e-07, + "loss": 0.299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16168315708637238, + "step": 2940, + "valid_targets_mean": 7192.1, + "valid_targets_min": 1895 + }, + { + "epoch": 4.712, + "grad_norm": 0.52608460848762, + "learning_rate": 4.0751712919417484e-07, + "loss": 0.3069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20969341695308685, + "step": 2945, + "valid_targets_mean": 4731.2, + "valid_targets_min": 736 + }, + { + "epoch": 4.72, + "grad_norm": 0.5530590908411556, + "learning_rate": 3.853849406756549e-07, + "loss": 0.2896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1489768624305725, + "step": 2950, + "valid_targets_mean": 3561.1, + "valid_targets_min": 1047 + }, + { + "epoch": 4.728, + "grad_norm": 0.6253292073895866, + "learning_rate": 3.6386480339621886e-07, + "loss": 0.3121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15937970578670502, + "step": 2955, + "valid_targets_mean": 3324.0, + "valid_targets_min": 1209 + }, + { + "epoch": 4.736, + "grad_norm": 0.4383199488774124, + "learning_rate": 3.4295738886670925e-07, + "loss": 0.2963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12093428522348404, + "step": 2960, + "valid_targets_mean": 4741.6, + "valid_targets_min": 901 + }, + { + "epoch": 4.744, + "grad_norm": 0.5811119572995426, + "learning_rate": 3.226633494786668e-07, + "loss": 0.3017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11559713631868362, + "step": 2965, + "valid_targets_mean": 4799.9, + "valid_targets_min": 775 + }, + { + "epoch": 4.752, + "grad_norm": 0.702215418406445, + "learning_rate": 3.0298331848398033e-07, + "loss": 0.3151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1337815821170807, + "step": 2970, + "valid_targets_mean": 2866.6, + "valid_targets_min": 865 + }, + { + "epoch": 4.76, + "grad_norm": 0.42111483300594843, + "learning_rate": 2.839179099751133e-07, + "loss": 0.3159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09750033915042877, + "step": 2975, + "valid_targets_mean": 3362.2, + "valid_targets_min": 909 + }, + { + "epoch": 4.768, + "grad_norm": 0.6679073549967152, + "learning_rate": 2.654677188659549e-07, + "loss": 0.2942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24244052171707153, + "step": 2980, + "valid_targets_mean": 4093.0, + "valid_targets_min": 734 + }, + { + "epoch": 4.776, + "grad_norm": 0.5819245393210193, + "learning_rate": 2.476333208732462e-07, + "loss": 0.3427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19281116127967834, + "step": 2985, + "valid_targets_mean": 3587.6, + "valid_targets_min": 808 + }, + { + "epoch": 4.784, + "grad_norm": 0.6047753681807744, + "learning_rate": 2.3041527249863193e-07, + "loss": 0.3198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18199463188648224, + "step": 2990, + "valid_targets_mean": 4387.0, + "valid_targets_min": 764 + }, + { + "epoch": 4.792, + "grad_norm": 0.6613784930971137, + "learning_rate": 2.1381411101127013e-07, + "loss": 0.2938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.132035493850708, + "step": 2995, + "valid_targets_mean": 2644.2, + "valid_targets_min": 687 + }, + { + "epoch": 4.8, + "grad_norm": 0.5682147375927875, + "learning_rate": 1.9783035443108999e-07, + "loss": 0.2967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1099662259221077, + "step": 3000, + "valid_targets_mean": 2433.6, + "valid_targets_min": 957 + }, + { + "epoch": 4.808, + "grad_norm": 0.4089149079258835, + "learning_rate": 1.8246450151261362e-07, + "loss": 0.3093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1526375263929367, + "step": 3005, + "valid_targets_mean": 7613.1, + "valid_targets_min": 791 + }, + { + "epoch": 4.816, + "grad_norm": 0.55810969874875, + "learning_rate": 1.6771703172940635e-07, + "loss": 0.3096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1753598004579544, + "step": 3010, + "valid_targets_mean": 4165.6, + "valid_targets_min": 794 + }, + { + "epoch": 4.824, + "grad_norm": 0.45869761806907694, + "learning_rate": 1.5358840525909967e-07, + "loss": 0.3151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12356032431125641, + "step": 3015, + "valid_targets_mean": 4902.8, + "valid_targets_min": 934 + }, + { + "epoch": 4.832, + "grad_norm": 0.5650404498859692, + "learning_rate": 1.4007906296904072e-07, + "loss": 0.3171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13098034262657166, + "step": 3020, + "valid_targets_mean": 3383.9, + "valid_targets_min": 854 + }, + { + "epoch": 4.84, + "grad_norm": 0.5184302552337475, + "learning_rate": 1.2718942640254084e-07, + "loss": 0.3097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16631071269512177, + "step": 3025, + "valid_targets_mean": 3698.4, + "valid_targets_min": 968 + }, + { + "epoch": 4.848, + "grad_norm": 0.479932423772461, + "learning_rate": 1.1491989776570623e-07, + "loss": 0.3022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15011048316955566, + "step": 3030, + "valid_targets_mean": 4314.4, + "valid_targets_min": 1056 + }, + { + "epoch": 4.856, + "grad_norm": 0.5789239779860111, + "learning_rate": 1.0327085991490127e-07, + "loss": 0.3391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11871884763240814, + "step": 3035, + "valid_targets_mean": 2391.0, + "valid_targets_min": 710 + }, + { + "epoch": 4.864, + "grad_norm": 0.5899999965939208, + "learning_rate": 9.22426763447981e-08, + "loss": 0.3145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13772061467170715, + "step": 3040, + "valid_targets_mean": 2684.2, + "valid_targets_min": 844 + }, + { + "epoch": 4.872, + "grad_norm": 0.5413636294872867, + "learning_rate": 8.183569117703461e-08, + "loss": 0.3173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1497182846069336, + "step": 3045, + "valid_targets_mean": 3171.4, + "valid_targets_min": 1127 + }, + { + "epoch": 4.88, + "grad_norm": 0.5102986675669349, + "learning_rate": 7.205022914946957e-08, + "loss": 0.3454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20103488862514496, + "step": 3050, + "valid_targets_mean": 5160.4, + "valid_targets_min": 936 + }, + { + "epoch": 4.888, + "grad_norm": 0.5687408463816219, + "learning_rate": 6.288659560606203e-08, + "loss": 0.3219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1383986473083496, + "step": 3055, + "valid_targets_mean": 3382.1, + "valid_targets_min": 1005 + }, + { + "epoch": 4.896, + "grad_norm": 0.4996884419003454, + "learning_rate": 5.4345076487332114e-08, + "loss": 0.3192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18319477140903473, + "step": 3060, + "valid_targets_mean": 4995.0, + "valid_targets_min": 1134 + }, + { + "epoch": 4.904, + "grad_norm": 0.465356601725948, + "learning_rate": 4.642593832144382e-08, + "loss": 0.2856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1306002140045166, + "step": 3065, + "valid_targets_mean": 4357.2, + "valid_targets_min": 667 + }, + { + "epoch": 4.912, + "grad_norm": 0.6879235684017253, + "learning_rate": 3.912942821589161e-08, + "loss": 0.2827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1553347408771515, + "step": 3070, + "valid_targets_mean": 5430.6, + "valid_targets_min": 2537 + }, + { + "epoch": 4.92, + "grad_norm": 0.6393549752658132, + "learning_rate": 3.2455773849779935e-08, + "loss": 0.3238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21189448237419128, + "step": 3075, + "valid_targets_mean": 3572.1, + "valid_targets_min": 1467 + }, + { + "epoch": 4.928, + "grad_norm": 0.4888826335889139, + "learning_rate": 2.6405183466731154e-08, + "loss": 0.3143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11773912608623505, + "step": 3080, + "valid_targets_mean": 3744.6, + "valid_targets_min": 1223 + }, + { + "epoch": 4.936, + "grad_norm": 0.5646847598387272, + "learning_rate": 2.0977845868375145e-08, + "loss": 0.2988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1856897473335266, + "step": 3085, + "valid_targets_mean": 3780.4, + "valid_targets_min": 613 + }, + { + "epoch": 4.944, + "grad_norm": 0.4228284310980491, + "learning_rate": 1.6173930408467376e-08, + "loss": 0.3228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14991110563278198, + "step": 3090, + "valid_targets_mean": 5997.0, + "valid_targets_min": 817 + }, + { + "epoch": 4.952, + "grad_norm": 0.46279632136180054, + "learning_rate": 1.199358698759978e-08, + "loss": 0.2987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15612635016441345, + "step": 3095, + "valid_targets_mean": 5848.9, + "valid_targets_min": 800 + }, + { + "epoch": 4.96, + "grad_norm": 0.5214714566840865, + "learning_rate": 8.436946048522298e-09, + "loss": 0.3246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20194128155708313, + "step": 3100, + "valid_targets_mean": 4828.2, + "valid_targets_min": 3631 + }, + { + "epoch": 4.968, + "grad_norm": 0.47269884538929174, + "learning_rate": 5.504118572081662e-09, + "loss": 0.3072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1507304161787033, + "step": 3105, + "valid_targets_mean": 4550.4, + "valid_targets_min": 779 + }, + { + "epoch": 4.976, + "grad_norm": 0.48876143808029926, + "learning_rate": 3.1951960737419686e-09, + "loss": 0.305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2046237587928772, + "step": 3110, + "valid_targets_mean": 6304.9, + "valid_targets_min": 1809 + }, + { + "epoch": 4.984, + "grad_norm": 0.5173224903959538, + "learning_rate": 1.5102506007447227e-09, + "loss": 0.3073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11847519129514694, + "step": 3115, + "valid_targets_mean": 2805.1, + "valid_targets_min": 1730 + }, + { + "epoch": 4.992, + "grad_norm": 0.5730468668213601, + "learning_rate": 4.493347298528683e-10, + "loss": 0.3402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1377507597208023, + "step": 3120, + "valid_targets_mean": 3388.4, + "valid_targets_min": 1106 + }, + { + "epoch": 5.0, + "grad_norm": 0.5459769103106156, + "learning_rate": 1.248156571209691e-11, + "loss": 0.296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1909605711698532, + "step": 3125, + "valid_targets_mean": 3954.1, + "valid_targets_min": 1068 + }, + { + "epoch": 5.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1909605711698532, + "step": 3125, + "total_flos": 7.266001898207969e+17, + "train_loss": 0.3592725233459473, + "train_runtime": 30483.9243, + "train_samples_per_second": 1.64, + "train_steps_per_second": 0.103, + "valid_targets_mean": 3954.1, + "valid_targets_min": 1068 + } + ], + "logging_steps": 5, + "max_steps": 3125, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7.266001898207969e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}