{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 3125, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008, "grad_norm": 5.049601892729115, "learning_rate": 5.111821086261981e-07, "loss": 0.7278, "loss_nan_ranks": 0, "loss_rank_avg": 0.43948009610176086, "step": 5, "valid_targets_mean": 7209.1, "valid_targets_min": 1277 }, { "epoch": 0.016, "grad_norm": 5.277364451906386, "learning_rate": 1.1501597444089457e-06, "loss": 0.6908, "loss_nan_ranks": 0, "loss_rank_avg": 0.30664390325546265, "step": 10, "valid_targets_mean": 3595.1, "valid_targets_min": 964 }, { "epoch": 0.024, "grad_norm": 3.9698434226273944, "learning_rate": 1.7891373801916933e-06, "loss": 0.6463, "loss_nan_ranks": 0, "loss_rank_avg": 0.3173980712890625, "step": 15, "valid_targets_mean": 4277.0, "valid_targets_min": 1066 }, { "epoch": 0.032, "grad_norm": 4.147563403598727, "learning_rate": 2.428115015974441e-06, "loss": 0.6988, "loss_nan_ranks": 0, "loss_rank_avg": 0.3903578519821167, "step": 20, "valid_targets_mean": 4019.9, "valid_targets_min": 1172 }, { "epoch": 0.04, "grad_norm": 2.579151645965659, "learning_rate": 3.0670926517571885e-06, "loss": 0.653, "loss_nan_ranks": 0, "loss_rank_avg": 0.23339255154132843, "step": 25, "valid_targets_mean": 2710.8, "valid_targets_min": 854 }, { "epoch": 0.048, "grad_norm": 1.6363825890362744, "learning_rate": 3.7060702875399364e-06, "loss": 0.6401, "loss_nan_ranks": 0, "loss_rank_avg": 0.2710292339324951, "step": 30, "valid_targets_mean": 4470.5, "valid_targets_min": 1069 }, { "epoch": 0.056, "grad_norm": 1.093070288309869, "learning_rate": 4.345047923322684e-06, "loss": 0.5552, "loss_nan_ranks": 0, "loss_rank_avg": 0.2130659818649292, "step": 35, "valid_targets_mean": 2896.5, "valid_targets_min": 757 }, { "epoch": 0.064, "grad_norm": 0.8596561845629843, "learning_rate": 4.984025559105431e-06, "loss": 0.5892, "loss_nan_ranks": 0, "loss_rank_avg": 0.3451421856880188, "step": 40, "valid_targets_mean": 6655.2, "valid_targets_min": 1658 }, { "epoch": 0.072, "grad_norm": 0.720981615728207, "learning_rate": 5.623003194888179e-06, "loss": 0.5639, "loss_nan_ranks": 0, "loss_rank_avg": 0.23500263690948486, "step": 45, "valid_targets_mean": 4629.5, "valid_targets_min": 1128 }, { "epoch": 0.08, "grad_norm": 0.7648020352559365, "learning_rate": 6.261980830670928e-06, "loss": 0.546, "loss_nan_ranks": 0, "loss_rank_avg": 0.28623056411743164, "step": 50, "valid_targets_mean": 4455.5, "valid_targets_min": 557 }, { "epoch": 0.088, "grad_norm": 0.8070597567729144, "learning_rate": 6.900958466453675e-06, "loss": 0.5496, "loss_nan_ranks": 0, "loss_rank_avg": 0.21983392536640167, "step": 55, "valid_targets_mean": 2054.8, "valid_targets_min": 867 }, { "epoch": 0.096, "grad_norm": 0.5770805999001849, "learning_rate": 7.5399361022364225e-06, "loss": 0.5428, "loss_nan_ranks": 0, "loss_rank_avg": 0.2292506992816925, "step": 60, "valid_targets_mean": 4125.5, "valid_targets_min": 699 }, { "epoch": 0.104, "grad_norm": 0.5971913839474638, "learning_rate": 8.17891373801917e-06, "loss": 0.4938, "loss_nan_ranks": 0, "loss_rank_avg": 0.20313440263271332, "step": 65, "valid_targets_mean": 3079.9, "valid_targets_min": 756 }, { "epoch": 0.112, "grad_norm": 0.686250956100844, "learning_rate": 8.817891373801917e-06, "loss": 0.5128, "loss_nan_ranks": 0, "loss_rank_avg": 0.2895665466785431, "step": 70, "valid_targets_mean": 2778.6, "valid_targets_min": 667 }, { "epoch": 0.12, "grad_norm": 0.6575289237390317, "learning_rate": 9.456869009584665e-06, "loss": 0.5072, "loss_nan_ranks": 0, "loss_rank_avg": 0.24841192364692688, "step": 75, "valid_targets_mean": 3479.6, "valid_targets_min": 661 }, { "epoch": 0.128, "grad_norm": 0.6049593114695113, "learning_rate": 1.0095846645367413e-05, "loss": 0.4851, "loss_nan_ranks": 0, "loss_rank_avg": 0.2605184018611908, "step": 80, "valid_targets_mean": 3057.5, "valid_targets_min": 1042 }, { "epoch": 0.136, "grad_norm": 0.594174930790963, "learning_rate": 1.073482428115016e-05, "loss": 0.475, "loss_nan_ranks": 0, "loss_rank_avg": 0.12189821153879166, "step": 85, "valid_targets_mean": 2004.9, "valid_targets_min": 849 }, { "epoch": 0.144, "grad_norm": 0.6352082822362639, "learning_rate": 1.1373801916932907e-05, "loss": 0.4988, "loss_nan_ranks": 0, "loss_rank_avg": 0.3049646317958832, "step": 90, "valid_targets_mean": 3373.8, "valid_targets_min": 1120 }, { "epoch": 0.152, "grad_norm": 0.6122762651129019, "learning_rate": 1.2012779552715656e-05, "loss": 0.5111, "loss_nan_ranks": 0, "loss_rank_avg": 0.32920581102371216, "step": 95, "valid_targets_mean": 3691.1, "valid_targets_min": 840 }, { "epoch": 0.16, "grad_norm": 0.4919427231068009, "learning_rate": 1.2651757188498404e-05, "loss": 0.4916, "loss_nan_ranks": 0, "loss_rank_avg": 0.16588306427001953, "step": 100, "valid_targets_mean": 4110.8, "valid_targets_min": 665 }, { "epoch": 0.168, "grad_norm": 0.567123357639076, "learning_rate": 1.329073482428115e-05, "loss": 0.5458, "loss_nan_ranks": 0, "loss_rank_avg": 0.2980887293815613, "step": 105, "valid_targets_mean": 4432.4, "valid_targets_min": 716 }, { "epoch": 0.176, "grad_norm": 0.5563571872329078, "learning_rate": 1.39297124600639e-05, "loss": 0.4754, "loss_nan_ranks": 0, "loss_rank_avg": 0.30753272771835327, "step": 110, "valid_targets_mean": 5548.8, "valid_targets_min": 1114 }, { "epoch": 0.184, "grad_norm": 0.5033412953896856, "learning_rate": 1.4568690095846648e-05, "loss": 0.4713, "loss_nan_ranks": 0, "loss_rank_avg": 0.19983811676502228, "step": 115, "valid_targets_mean": 3808.8, "valid_targets_min": 1071 }, { "epoch": 0.192, "grad_norm": 0.5542569365948462, "learning_rate": 1.5207667731629394e-05, "loss": 0.4201, "loss_nan_ranks": 0, "loss_rank_avg": 0.1442127823829651, "step": 120, "valid_targets_mean": 1944.8, "valid_targets_min": 727 }, { "epoch": 0.2, "grad_norm": 0.5369920791073214, "learning_rate": 1.584664536741214e-05, "loss": 0.4804, "loss_nan_ranks": 0, "loss_rank_avg": 0.21610140800476074, "step": 125, "valid_targets_mean": 4456.5, "valid_targets_min": 938 }, { "epoch": 0.208, "grad_norm": 0.5934402873809017, "learning_rate": 1.648562300319489e-05, "loss": 0.4488, "loss_nan_ranks": 0, "loss_rank_avg": 0.21678262948989868, "step": 130, "valid_targets_mean": 2704.4, "valid_targets_min": 1028 }, { "epoch": 0.216, "grad_norm": 0.6167459028653418, "learning_rate": 1.712460063897764e-05, "loss": 0.4635, "loss_nan_ranks": 0, "loss_rank_avg": 0.26390573382377625, "step": 135, "valid_targets_mean": 3041.1, "valid_targets_min": 690 }, { "epoch": 0.224, "grad_norm": 0.6057571170803768, "learning_rate": 1.7763578274760385e-05, "loss": 0.473, "loss_nan_ranks": 0, "loss_rank_avg": 0.2703923285007477, "step": 140, "valid_targets_mean": 3405.5, "valid_targets_min": 1388 }, { "epoch": 0.232, "grad_norm": 0.5964087448191424, "learning_rate": 1.840255591054313e-05, "loss": 0.4281, "loss_nan_ranks": 0, "loss_rank_avg": 0.12820664048194885, "step": 145, "valid_targets_mean": 2341.5, "valid_targets_min": 548 }, { "epoch": 0.24, "grad_norm": 0.7944123967649483, "learning_rate": 1.904153354632588e-05, "loss": 0.4823, "loss_nan_ranks": 0, "loss_rank_avg": 0.3033648133277893, "step": 150, "valid_targets_mean": 2520.9, "valid_targets_min": 945 }, { "epoch": 0.248, "grad_norm": 0.6692216622848669, "learning_rate": 1.9680511182108627e-05, "loss": 0.4764, "loss_nan_ranks": 0, "loss_rank_avg": 0.352511465549469, "step": 155, "valid_targets_mean": 3567.8, "valid_targets_min": 1011 }, { "epoch": 0.256, "grad_norm": 0.5976706030380227, "learning_rate": 2.0319488817891376e-05, "loss": 0.4391, "loss_nan_ranks": 0, "loss_rank_avg": 0.25001269578933716, "step": 160, "valid_targets_mean": 3181.8, "valid_targets_min": 1389 }, { "epoch": 0.264, "grad_norm": 1.0239772828743636, "learning_rate": 2.0958466453674126e-05, "loss": 0.446, "loss_nan_ranks": 0, "loss_rank_avg": 0.16786524653434753, "step": 165, "valid_targets_mean": 3835.6, "valid_targets_min": 1012 }, { "epoch": 0.272, "grad_norm": 0.42177417682653895, "learning_rate": 2.1597444089456872e-05, "loss": 0.4304, "loss_nan_ranks": 0, "loss_rank_avg": 0.19490358233451843, "step": 170, "valid_targets_mean": 6332.6, "valid_targets_min": 948 }, { "epoch": 0.28, "grad_norm": 0.5895166731709086, "learning_rate": 2.2236421725239618e-05, "loss": 0.465, "loss_nan_ranks": 0, "loss_rank_avg": 0.22322815656661987, "step": 175, "valid_targets_mean": 3828.6, "valid_targets_min": 947 }, { "epoch": 0.288, "grad_norm": 0.6030927972832155, "learning_rate": 2.2875399361022364e-05, "loss": 0.3898, "loss_nan_ranks": 0, "loss_rank_avg": 0.1875823736190796, "step": 180, "valid_targets_mean": 3343.1, "valid_targets_min": 790 }, { "epoch": 0.296, "grad_norm": 0.45033732473663274, "learning_rate": 2.3514376996805114e-05, "loss": 0.4055, "loss_nan_ranks": 0, "loss_rank_avg": 0.16341853141784668, "step": 185, "valid_targets_mean": 4346.8, "valid_targets_min": 621 }, { "epoch": 0.304, "grad_norm": 0.6410357135895351, "learning_rate": 2.415335463258786e-05, "loss": 0.451, "loss_nan_ranks": 0, "loss_rank_avg": 0.2382049858570099, "step": 190, "valid_targets_mean": 2598.9, "valid_targets_min": 869 }, { "epoch": 0.312, "grad_norm": 0.40862470776459403, "learning_rate": 2.4792332268370606e-05, "loss": 0.4398, "loss_nan_ranks": 0, "loss_rank_avg": 0.19033300876617432, "step": 195, "valid_targets_mean": 8101.2, "valid_targets_min": 1425 }, { "epoch": 0.32, "grad_norm": 0.4639247345136467, "learning_rate": 2.543130990415336e-05, "loss": 0.4575, "loss_nan_ranks": 0, "loss_rank_avg": 0.1774352490901947, "step": 200, "valid_targets_mean": 3866.1, "valid_targets_min": 1002 }, { "epoch": 0.328, "grad_norm": 0.6045432758950621, "learning_rate": 2.6070287539936105e-05, "loss": 0.4629, "loss_nan_ranks": 0, "loss_rank_avg": 0.20918956398963928, "step": 205, "valid_targets_mean": 4064.6, "valid_targets_min": 907 }, { "epoch": 0.336, "grad_norm": 0.49705632653844295, "learning_rate": 2.670926517571885e-05, "loss": 0.4128, "loss_nan_ranks": 0, "loss_rank_avg": 0.20427216589450836, "step": 210, "valid_targets_mean": 5069.9, "valid_targets_min": 701 }, { "epoch": 0.344, "grad_norm": 0.48520489500855396, "learning_rate": 2.73482428115016e-05, "loss": 0.4252, "loss_nan_ranks": 0, "loss_rank_avg": 0.34422576427459717, "step": 215, "valid_targets_mean": 7910.2, "valid_targets_min": 1506 }, { "epoch": 0.352, "grad_norm": 0.4668215278946658, "learning_rate": 2.7987220447284347e-05, "loss": 0.4237, "loss_nan_ranks": 0, "loss_rank_avg": 0.21643120050430298, "step": 220, "valid_targets_mean": 5393.4, "valid_targets_min": 737 }, { "epoch": 0.36, "grad_norm": 0.5943792588027108, "learning_rate": 2.8626198083067093e-05, "loss": 0.4247, "loss_nan_ranks": 0, "loss_rank_avg": 0.13178721070289612, "step": 225, "valid_targets_mean": 2101.8, "valid_targets_min": 858 }, { "epoch": 0.368, "grad_norm": 0.5517991295062519, "learning_rate": 2.9265175718849843e-05, "loss": 0.3864, "loss_nan_ranks": 0, "loss_rank_avg": 0.14202260971069336, "step": 230, "valid_targets_mean": 2987.0, "valid_targets_min": 814 }, { "epoch": 0.376, "grad_norm": 0.6080233680136686, "learning_rate": 2.9904153354632592e-05, "loss": 0.4359, "loss_nan_ranks": 0, "loss_rank_avg": 0.2212154120206833, "step": 235, "valid_targets_mean": 3522.0, "valid_targets_min": 1157 }, { "epoch": 0.384, "grad_norm": 0.7001104625755565, "learning_rate": 3.054313099041534e-05, "loss": 0.4193, "loss_nan_ranks": 0, "loss_rank_avg": 0.32441866397857666, "step": 240, "valid_targets_mean": 3398.5, "valid_targets_min": 700 }, { "epoch": 0.392, "grad_norm": 0.5240312255698709, "learning_rate": 3.1182108626198084e-05, "loss": 0.4328, "loss_nan_ranks": 0, "loss_rank_avg": 0.23227915167808533, "step": 245, "valid_targets_mean": 4712.2, "valid_targets_min": 666 }, { "epoch": 0.4, "grad_norm": 0.8648496001859342, "learning_rate": 3.1821086261980834e-05, "loss": 0.4262, "loss_nan_ranks": 0, "loss_rank_avg": 0.19384440779685974, "step": 250, "valid_targets_mean": 1589.8, "valid_targets_min": 634 }, { "epoch": 0.408, "grad_norm": 0.5041946092525469, "learning_rate": 3.246006389776358e-05, "loss": 0.3766, "loss_nan_ranks": 0, "loss_rank_avg": 0.23754949867725372, "step": 255, "valid_targets_mean": 5545.6, "valid_targets_min": 697 }, { "epoch": 0.416, "grad_norm": 0.741812736773093, "learning_rate": 3.3099041533546326e-05, "loss": 0.4342, "loss_nan_ranks": 0, "loss_rank_avg": 0.19546037912368774, "step": 260, "valid_targets_mean": 1842.6, "valid_targets_min": 753 }, { "epoch": 0.424, "grad_norm": 0.6249635080823627, "learning_rate": 3.3738019169329076e-05, "loss": 0.3958, "loss_nan_ranks": 0, "loss_rank_avg": 0.3015894591808319, "step": 265, "valid_targets_mean": 3354.1, "valid_targets_min": 688 }, { "epoch": 0.432, "grad_norm": 0.5157995715206686, "learning_rate": 3.4376996805111825e-05, "loss": 0.4649, "loss_nan_ranks": 0, "loss_rank_avg": 0.23236235976219177, "step": 270, "valid_targets_mean": 4596.9, "valid_targets_min": 1140 }, { "epoch": 0.44, "grad_norm": 0.6481359309360338, "learning_rate": 3.5015974440894575e-05, "loss": 0.4068, "loss_nan_ranks": 0, "loss_rank_avg": 0.1774221956729889, "step": 275, "valid_targets_mean": 2685.8, "valid_targets_min": 448 }, { "epoch": 0.448, "grad_norm": 0.5003694452381771, "learning_rate": 3.565495207667732e-05, "loss": 0.3946, "loss_nan_ranks": 0, "loss_rank_avg": 0.20578652620315552, "step": 280, "valid_targets_mean": 5089.4, "valid_targets_min": 1146 }, { "epoch": 0.456, "grad_norm": 0.5430128250712549, "learning_rate": 3.629392971246007e-05, "loss": 0.4262, "loss_nan_ranks": 0, "loss_rank_avg": 0.19618654251098633, "step": 285, "valid_targets_mean": 2835.6, "valid_targets_min": 1099 }, { "epoch": 0.464, "grad_norm": 0.5889443716013671, "learning_rate": 3.6932907348242816e-05, "loss": 0.4326, "loss_nan_ranks": 0, "loss_rank_avg": 0.26432836055755615, "step": 290, "valid_targets_mean": 4703.8, "valid_targets_min": 1265 }, { "epoch": 0.472, "grad_norm": 0.5008891357690028, "learning_rate": 3.757188498402556e-05, "loss": 0.4477, "loss_nan_ranks": 0, "loss_rank_avg": 0.28413641452789307, "step": 295, "valid_targets_mean": 6156.9, "valid_targets_min": 1117 }, { "epoch": 0.48, "grad_norm": 0.45277012879942824, "learning_rate": 3.821086261980831e-05, "loss": 0.4005, "loss_nan_ranks": 0, "loss_rank_avg": 0.2018493264913559, "step": 300, "valid_targets_mean": 6251.2, "valid_targets_min": 699 }, { "epoch": 0.488, "grad_norm": 0.5498771344672786, "learning_rate": 3.884984025559106e-05, "loss": 0.4067, "loss_nan_ranks": 0, "loss_rank_avg": 0.20270295441150665, "step": 305, "valid_targets_mean": 3713.6, "valid_targets_min": 426 }, { "epoch": 0.496, "grad_norm": 0.4849980047118573, "learning_rate": 3.94888178913738e-05, "loss": 0.4172, "loss_nan_ranks": 0, "loss_rank_avg": 0.19320783019065857, "step": 310, "valid_targets_mean": 3616.2, "valid_targets_min": 1182 }, { "epoch": 0.504, "grad_norm": 0.46547683956214736, "learning_rate": 3.9999987518434296e-05, "loss": 0.4113, "loss_nan_ranks": 0, "loss_rank_avg": 0.16570322215557098, "step": 315, "valid_targets_mean": 4545.1, "valid_targets_min": 894 }, { "epoch": 0.512, "grad_norm": 0.6433873513411175, "learning_rate": 3.999955066527015e-05, "loss": 0.4134, "loss_nan_ranks": 0, "loss_rank_avg": 0.2329372763633728, "step": 320, "valid_targets_mean": 3225.2, "valid_targets_min": 1171 }, { "epoch": 0.52, "grad_norm": 0.48867055877170756, "learning_rate": 3.999848974939926e-05, "loss": 0.3981, "loss_nan_ranks": 0, "loss_rank_avg": 0.11635329574346542, "step": 325, "valid_targets_mean": 3431.9, "valid_targets_min": 713 }, { "epoch": 0.528, "grad_norm": 1.4894122572179855, "learning_rate": 3.999680480392626e-05, "loss": 0.4087, "loss_nan_ranks": 0, "loss_rank_avg": 0.18833759427070618, "step": 330, "valid_targets_mean": 3035.5, "valid_targets_min": 732 }, { "epoch": 0.536, "grad_norm": 0.4193239467986382, "learning_rate": 3.999449588142792e-05, "loss": 0.4141, "loss_nan_ranks": 0, "loss_rank_avg": 0.20873858034610748, "step": 335, "valid_targets_mean": 5798.9, "valid_targets_min": 2283 }, { "epoch": 0.544, "grad_norm": 0.4322322156645337, "learning_rate": 3.9991563053951476e-05, "loss": 0.3762, "loss_nan_ranks": 0, "loss_rank_avg": 0.15418782830238342, "step": 340, "valid_targets_mean": 4867.1, "valid_targets_min": 913 }, { "epoch": 0.552, "grad_norm": 0.42579176413360825, "learning_rate": 3.99880064130124e-05, "loss": 0.3795, "loss_nan_ranks": 0, "loss_rank_avg": 0.18619948625564575, "step": 345, "valid_targets_mean": 5330.1, "valid_targets_min": 676 }, { "epoch": 0.56, "grad_norm": 0.5161509309473619, "learning_rate": 3.9983826069591535e-05, "loss": 0.4151, "loss_nan_ranks": 0, "loss_rank_avg": 0.1781724989414215, "step": 350, "valid_targets_mean": 3472.8, "valid_targets_min": 997 }, { "epoch": 0.568, "grad_norm": 0.6041811404254598, "learning_rate": 3.997902215413163e-05, "loss": 0.3963, "loss_nan_ranks": 0, "loss_rank_avg": 0.30378806591033936, "step": 355, "valid_targets_mean": 3981.2, "valid_targets_min": 912 }, { "epoch": 0.576, "grad_norm": 0.6498872075998322, "learning_rate": 3.997359481653327e-05, "loss": 0.407, "loss_nan_ranks": 0, "loss_rank_avg": 0.20173442363739014, "step": 360, "valid_targets_mean": 2788.2, "valid_targets_min": 878 }, { "epoch": 0.584, "grad_norm": 0.5061612683016131, "learning_rate": 3.996754422615023e-05, "loss": 0.3623, "loss_nan_ranks": 0, "loss_rank_avg": 0.22666072845458984, "step": 365, "valid_targets_mean": 4118.0, "valid_targets_min": 1373 }, { "epoch": 0.592, "grad_norm": 0.5361445261038215, "learning_rate": 3.996087057178411e-05, "loss": 0.4507, "loss_nan_ranks": 0, "loss_rank_avg": 0.19703161716461182, "step": 370, "valid_targets_mean": 3619.8, "valid_targets_min": 582 }, { "epoch": 0.6, "grad_norm": 0.672527479458296, "learning_rate": 3.995357406167856e-05, "loss": 0.4195, "loss_nan_ranks": 0, "loss_rank_avg": 0.2972131371498108, "step": 375, "valid_targets_mean": 3516.8, "valid_targets_min": 874 }, { "epoch": 0.608, "grad_norm": 0.44549432799665417, "learning_rate": 3.994565492351267e-05, "loss": 0.4056, "loss_nan_ranks": 0, "loss_rank_avg": 0.17287719249725342, "step": 380, "valid_targets_mean": 5475.5, "valid_targets_min": 1842 }, { "epoch": 0.616, "grad_norm": 0.6778440222156983, "learning_rate": 3.993711340439394e-05, "loss": 0.4242, "loss_nan_ranks": 0, "loss_rank_avg": 0.21647445857524872, "step": 385, "valid_targets_mean": 3971.5, "valid_targets_min": 1585 }, { "epoch": 0.624, "grad_norm": 0.4459566145405042, "learning_rate": 3.9927949770850535e-05, "loss": 0.3925, "loss_nan_ranks": 0, "loss_rank_avg": 0.1151498481631279, "step": 390, "valid_targets_mean": 2951.2, "valid_targets_min": 1088 }, { "epoch": 0.632, "grad_norm": 0.48331678470692757, "learning_rate": 3.991816430882297e-05, "loss": 0.3602, "loss_nan_ranks": 0, "loss_rank_avg": 0.2142452448606491, "step": 395, "valid_targets_mean": 4534.9, "valid_targets_min": 711 }, { "epoch": 0.64, "grad_norm": 0.5127689050837395, "learning_rate": 3.9907757323655206e-05, "loss": 0.4035, "loss_nan_ranks": 0, "loss_rank_avg": 0.23935633897781372, "step": 400, "valid_targets_mean": 4701.5, "valid_targets_min": 598 }, { "epoch": 0.648, "grad_norm": 0.4843857269179256, "learning_rate": 3.98967291400851e-05, "loss": 0.3784, "loss_nan_ranks": 0, "loss_rank_avg": 0.25847354531288147, "step": 405, "valid_targets_mean": 5699.5, "valid_targets_min": 879 }, { "epoch": 0.656, "grad_norm": 0.5005998339687705, "learning_rate": 3.98850801022343e-05, "loss": 0.3906, "loss_nan_ranks": 0, "loss_rank_avg": 0.1377241611480713, "step": 410, "valid_targets_mean": 2504.0, "valid_targets_min": 740 }, { "epoch": 0.664, "grad_norm": 0.4717110410055623, "learning_rate": 3.987281057359746e-05, "loss": 0.4076, "loss_nan_ranks": 0, "loss_rank_avg": 0.20272189378738403, "step": 415, "valid_targets_mean": 5388.0, "valid_targets_min": 1322 }, { "epoch": 0.672, "grad_norm": 0.5915926837830854, "learning_rate": 3.985992093703096e-05, "loss": 0.4205, "loss_nan_ranks": 0, "loss_rank_avg": 0.16994090378284454, "step": 420, "valid_targets_mean": 2121.0, "valid_targets_min": 784 }, { "epoch": 0.68, "grad_norm": 0.43255266985931967, "learning_rate": 3.98464115947409e-05, "loss": 0.3954, "loss_nan_ranks": 0, "loss_rank_avg": 0.1824256181716919, "step": 425, "valid_targets_mean": 5472.2, "valid_targets_min": 1140 }, { "epoch": 0.688, "grad_norm": 0.49054534785016624, "learning_rate": 3.9832282968270595e-05, "loss": 0.4021, "loss_nan_ranks": 0, "loss_rank_avg": 0.1873103380203247, "step": 430, "valid_targets_mean": 3608.4, "valid_targets_min": 735 }, { "epoch": 0.696, "grad_norm": 0.5313053332029032, "learning_rate": 3.9817535498487385e-05, "loss": 0.4255, "loss_nan_ranks": 0, "loss_rank_avg": 0.2174983024597168, "step": 435, "valid_targets_mean": 3903.1, "valid_targets_min": 773 }, { "epoch": 0.704, "grad_norm": 0.5660976015817243, "learning_rate": 3.980216964556892e-05, "loss": 0.4006, "loss_nan_ranks": 0, "loss_rank_avg": 0.2702789306640625, "step": 440, "valid_targets_mean": 5340.2, "valid_targets_min": 2464 }, { "epoch": 0.712, "grad_norm": 0.3583060517799168, "learning_rate": 3.978618588898873e-05, "loss": 0.3687, "loss_nan_ranks": 0, "loss_rank_avg": 0.17784491181373596, "step": 445, "valid_targets_mean": 5914.0, "valid_targets_min": 1108 }, { "epoch": 0.72, "grad_norm": 0.5085712449981009, "learning_rate": 3.976958472750137e-05, "loss": 0.415, "loss_nan_ranks": 0, "loss_rank_avg": 0.17204846441745758, "step": 450, "valid_targets_mean": 2789.6, "valid_targets_min": 1260 }, { "epoch": 0.728, "grad_norm": 0.5391324690732857, "learning_rate": 3.9752366679126754e-05, "loss": 0.4117, "loss_nan_ranks": 0, "loss_rank_avg": 0.23437952995300293, "step": 455, "valid_targets_mean": 3562.9, "valid_targets_min": 952 }, { "epoch": 0.736, "grad_norm": 0.5567986016900565, "learning_rate": 3.973453228113405e-05, "loss": 0.4096, "loss_nan_ranks": 0, "loss_rank_avg": 0.22452281415462494, "step": 460, "valid_targets_mean": 3349.6, "valid_targets_min": 623 }, { "epoch": 0.744, "grad_norm": 0.5058714202986386, "learning_rate": 3.971608209002489e-05, "loss": 0.4383, "loss_nan_ranks": 0, "loss_rank_avg": 0.26416462659835815, "step": 465, "valid_targets_mean": 4220.1, "valid_targets_min": 1122 }, { "epoch": 0.752, "grad_norm": 0.5026494486558096, "learning_rate": 3.969701668151603e-05, "loss": 0.3986, "loss_nan_ranks": 0, "loss_rank_avg": 0.16783612966537476, "step": 470, "valid_targets_mean": 3548.6, "valid_targets_min": 787 }, { "epoch": 0.76, "grad_norm": 0.5620692640215499, "learning_rate": 3.9677336650521336e-05, "loss": 0.3936, "loss_nan_ranks": 0, "loss_rank_avg": 0.23793232440948486, "step": 475, "valid_targets_mean": 4302.1, "valid_targets_min": 857 }, { "epoch": 0.768, "grad_norm": 0.5022492717127841, "learning_rate": 3.9657042611133294e-05, "loss": 0.4374, "loss_nan_ranks": 0, "loss_rank_avg": 0.27741798758506775, "step": 480, "valid_targets_mean": 5250.4, "valid_targets_min": 1286 }, { "epoch": 0.776, "grad_norm": 0.46269173482413795, "learning_rate": 3.963613519660379e-05, "loss": 0.4168, "loss_nan_ranks": 0, "loss_rank_avg": 0.2027072012424469, "step": 485, "valid_targets_mean": 5004.6, "valid_targets_min": 613 }, { "epoch": 0.784, "grad_norm": 0.5741596915736422, "learning_rate": 3.961461505932435e-05, "loss": 0.4089, "loss_nan_ranks": 0, "loss_rank_avg": 0.20953507721424103, "step": 490, "valid_targets_mean": 4452.9, "valid_targets_min": 872 }, { "epoch": 0.792, "grad_norm": 0.4907391959582943, "learning_rate": 3.959248287080583e-05, "loss": 0.4368, "loss_nan_ranks": 0, "loss_rank_avg": 0.16955000162124634, "step": 495, "valid_targets_mean": 3555.2, "valid_targets_min": 371 }, { "epoch": 0.8, "grad_norm": 0.6031193676144202, "learning_rate": 3.9569739321657416e-05, "loss": 0.3877, "loss_nan_ranks": 0, "loss_rank_avg": 0.1926141083240509, "step": 500, "valid_targets_mean": 2296.8, "valid_targets_min": 895 }, { "epoch": 0.808, "grad_norm": 0.5937397695685496, "learning_rate": 3.9546385121565095e-05, "loss": 0.407, "loss_nan_ranks": 0, "loss_rank_avg": 0.2059236317873001, "step": 505, "valid_targets_mean": 2328.5, "valid_targets_min": 875 }, { "epoch": 0.816, "grad_norm": 0.44462295687513187, "learning_rate": 3.952242099926951e-05, "loss": 0.39, "loss_nan_ranks": 0, "loss_rank_avg": 0.15669041872024536, "step": 510, "valid_targets_mean": 4059.2, "valid_targets_min": 896 }, { "epoch": 0.824, "grad_norm": 0.4975667663636538, "learning_rate": 3.9497847702543196e-05, "loss": 0.4132, "loss_nan_ranks": 0, "loss_rank_avg": 0.2388497292995453, "step": 515, "valid_targets_mean": 4820.4, "valid_targets_min": 797 }, { "epoch": 0.832, "grad_norm": 0.5711830488685984, "learning_rate": 3.94726659981673e-05, "loss": 0.4272, "loss_nan_ranks": 0, "loss_rank_avg": 0.1668613851070404, "step": 520, "valid_targets_mean": 2210.6, "valid_targets_min": 1146 }, { "epoch": 0.84, "grad_norm": 0.5110974517075352, "learning_rate": 3.94468766719076e-05, "loss": 0.3883, "loss_nan_ranks": 0, "loss_rank_avg": 0.15008708834648132, "step": 525, "valid_targets_mean": 2796.0, "valid_targets_min": 706 }, { "epoch": 0.848, "grad_norm": 0.6082548647418597, "learning_rate": 3.942048052849001e-05, "loss": 0.4015, "loss_nan_ranks": 0, "loss_rank_avg": 0.20496749877929688, "step": 530, "valid_targets_mean": 3376.1, "valid_targets_min": 727 }, { "epoch": 0.856, "grad_norm": 0.5881521119749641, "learning_rate": 3.939347839157548e-05, "loss": 0.4042, "loss_nan_ranks": 0, "loss_rank_avg": 0.1840178519487381, "step": 535, "valid_targets_mean": 3227.4, "valid_targets_min": 523 }, { "epoch": 0.864, "grad_norm": 0.40438864628986093, "learning_rate": 3.9365871103734264e-05, "loss": 0.3748, "loss_nan_ranks": 0, "loss_rank_avg": 0.14281845092773438, "step": 540, "valid_targets_mean": 4520.6, "valid_targets_min": 1131 }, { "epoch": 0.872, "grad_norm": 0.43619871599488097, "learning_rate": 3.933765952641965e-05, "loss": 0.4081, "loss_nan_ranks": 0, "loss_rank_avg": 0.11543935537338257, "step": 545, "valid_targets_mean": 2299.2, "valid_targets_min": 847 }, { "epoch": 0.88, "grad_norm": 0.6284680735434611, "learning_rate": 3.930884453994109e-05, "loss": 0.3934, "loss_nan_ranks": 0, "loss_rank_avg": 0.2154695689678192, "step": 550, "valid_targets_mean": 2253.1, "valid_targets_min": 1014 }, { "epoch": 0.888, "grad_norm": 0.534153789789492, "learning_rate": 3.9279427043436706e-05, "loss": 0.4357, "loss_nan_ranks": 0, "loss_rank_avg": 0.16836689412593842, "step": 555, "valid_targets_mean": 2522.4, "valid_targets_min": 866 }, { "epoch": 0.896, "grad_norm": 0.42996929538088907, "learning_rate": 3.924940795484525e-05, "loss": 0.3961, "loss_nan_ranks": 0, "loss_rank_avg": 0.1445537656545639, "step": 560, "valid_targets_mean": 3909.5, "valid_targets_min": 779 }, { "epoch": 0.904, "grad_norm": 0.5487891975426903, "learning_rate": 3.9218788210877436e-05, "loss": 0.4047, "loss_nan_ranks": 0, "loss_rank_avg": 0.17581608891487122, "step": 565, "valid_targets_mean": 2792.2, "valid_targets_min": 780 }, { "epoch": 0.912, "grad_norm": 0.8309295578756681, "learning_rate": 3.918756876698676e-05, "loss": 0.4498, "loss_nan_ranks": 0, "loss_rank_avg": 0.24004212021827698, "step": 570, "valid_targets_mean": 2094.1, "valid_targets_min": 800 }, { "epoch": 0.92, "grad_norm": 0.5731658303042831, "learning_rate": 3.9155750597339634e-05, "loss": 0.4248, "loss_nan_ranks": 0, "loss_rank_avg": 0.23413121700286865, "step": 575, "valid_targets_mean": 2952.8, "valid_targets_min": 740 }, { "epoch": 0.928, "grad_norm": 0.6857186460842621, "learning_rate": 3.912333469478502e-05, "loss": 0.4148, "loss_nan_ranks": 0, "loss_rank_avg": 0.24854609370231628, "step": 580, "valid_targets_mean": 3259.5, "valid_targets_min": 1202 }, { "epoch": 0.936, "grad_norm": 0.46414179194452654, "learning_rate": 3.909032207082344e-05, "loss": 0.3897, "loss_nan_ranks": 0, "loss_rank_avg": 0.19742190837860107, "step": 585, "valid_targets_mean": 4621.4, "valid_targets_min": 1137 }, { "epoch": 0.944, "grad_norm": 0.5432959658650993, "learning_rate": 3.90567137555754e-05, "loss": 0.3952, "loss_nan_ranks": 0, "loss_rank_avg": 0.18072067201137543, "step": 590, "valid_targets_mean": 2764.4, "valid_targets_min": 610 }, { "epoch": 0.952, "grad_norm": 0.513616096435003, "learning_rate": 3.9022510797749286e-05, "loss": 0.4508, "loss_nan_ranks": 0, "loss_rank_avg": 0.28625935316085815, "step": 595, "valid_targets_mean": 4943.6, "valid_targets_min": 1006 }, { "epoch": 0.96, "grad_norm": 0.4135402289886326, "learning_rate": 3.898771426460859e-05, "loss": 0.3864, "loss_nan_ranks": 0, "loss_rank_avg": 0.1969718635082245, "step": 600, "valid_targets_mean": 4979.9, "valid_targets_min": 971 }, { "epoch": 0.968, "grad_norm": 0.42359538658922197, "learning_rate": 3.8952325241938635e-05, "loss": 0.4183, "loss_nan_ranks": 0, "loss_rank_avg": 0.1678941547870636, "step": 605, "valid_targets_mean": 4025.1, "valid_targets_min": 1413 }, { "epoch": 0.976, "grad_norm": 0.437069204077118, "learning_rate": 3.8916344834012695e-05, "loss": 0.3807, "loss_nan_ranks": 0, "loss_rank_avg": 0.18256065249443054, "step": 610, "valid_targets_mean": 4092.8, "valid_targets_min": 722 }, { "epoch": 0.984, "grad_norm": 0.3981270298976202, "learning_rate": 3.887977416355754e-05, "loss": 0.3837, "loss_nan_ranks": 0, "loss_rank_avg": 0.213514506816864, "step": 615, "valid_targets_mean": 5298.8, "valid_targets_min": 1094 }, { "epoch": 0.992, "grad_norm": 0.39948471020655046, "learning_rate": 3.884261437171838e-05, "loss": 0.3919, "loss_nan_ranks": 0, "loss_rank_avg": 0.18268711864948273, "step": 620, "valid_targets_mean": 5262.4, "valid_targets_min": 771 }, { "epoch": 1.0, "grad_norm": 0.4218730892895318, "learning_rate": 3.8804866618023284e-05, "loss": 0.3663, "loss_nan_ranks": 0, "loss_rank_avg": 0.2073502242565155, "step": 625, "valid_targets_mean": 6279.8, "valid_targets_min": 665 }, { "epoch": 1.008, "grad_norm": 0.4006188688474502, "learning_rate": 3.876653208034698e-05, "loss": 0.375, "loss_nan_ranks": 0, "loss_rank_avg": 0.15046511590480804, "step": 630, "valid_targets_mean": 4694.0, "valid_targets_min": 572 }, { "epoch": 1.016, "grad_norm": 0.5023669209376572, "learning_rate": 3.8727611954874114e-05, "loss": 0.4108, "loss_nan_ranks": 0, "loss_rank_avg": 0.19494599103927612, "step": 635, "valid_targets_mean": 4312.2, "valid_targets_min": 1605 }, { "epoch": 1.024, "grad_norm": 0.5415179452211821, "learning_rate": 3.8688107456061904e-05, "loss": 0.3649, "loss_nan_ranks": 0, "loss_rank_avg": 0.16564016044139862, "step": 640, "valid_targets_mean": 5437.8, "valid_targets_min": 950 }, { "epoch": 1.032, "grad_norm": 0.5250999668731369, "learning_rate": 3.864801981660227e-05, "loss": 0.3787, "loss_nan_ranks": 0, "loss_rank_avg": 0.16773557662963867, "step": 645, "valid_targets_mean": 2591.0, "valid_targets_min": 896 }, { "epoch": 1.04, "grad_norm": 0.4792499299558008, "learning_rate": 3.860735028738337e-05, "loss": 0.3879, "loss_nan_ranks": 0, "loss_rank_avg": 0.11868780851364136, "step": 650, "valid_targets_mean": 2843.8, "valid_targets_min": 667 }, { "epoch": 1.048, "grad_norm": 0.5931956749749214, "learning_rate": 3.856610013745051e-05, "loss": 0.3869, "loss_nan_ranks": 0, "loss_rank_avg": 0.17905378341674805, "step": 655, "valid_targets_mean": 2138.0, "valid_targets_min": 811 }, { "epoch": 1.056, "grad_norm": 0.5070089472539886, "learning_rate": 3.852427065396665e-05, "loss": 0.3597, "loss_nan_ranks": 0, "loss_rank_avg": 0.21618938446044922, "step": 660, "valid_targets_mean": 3818.6, "valid_targets_min": 874 }, { "epoch": 1.064, "grad_norm": 0.5277258256022489, "learning_rate": 3.848186314217213e-05, "loss": 0.3832, "loss_nan_ranks": 0, "loss_rank_avg": 0.20905262231826782, "step": 665, "valid_targets_mean": 5543.8, "valid_targets_min": 1842 }, { "epoch": 1.072, "grad_norm": 0.45992756895656156, "learning_rate": 3.843887892534402e-05, "loss": 0.3628, "loss_nan_ranks": 0, "loss_rank_avg": 0.14996416866779327, "step": 670, "valid_targets_mean": 3032.8, "valid_targets_min": 865 }, { "epoch": 1.08, "grad_norm": 0.43582299518949036, "learning_rate": 3.8395319344754776e-05, "loss": 0.3695, "loss_nan_ranks": 0, "loss_rank_avg": 0.14619383215904236, "step": 675, "valid_targets_mean": 4816.9, "valid_targets_min": 678 }, { "epoch": 1.088, "grad_norm": 0.47992188401849384, "learning_rate": 3.8351185759630435e-05, "loss": 0.3989, "loss_nan_ranks": 0, "loss_rank_avg": 0.17877763509750366, "step": 680, "valid_targets_mean": 4263.5, "valid_targets_min": 586 }, { "epoch": 1.096, "grad_norm": 0.5853943377609927, "learning_rate": 3.830647954710816e-05, "loss": 0.3652, "loss_nan_ranks": 0, "loss_rank_avg": 0.09830217808485031, "step": 685, "valid_targets_mean": 1025.2, "valid_targets_min": 527 }, { "epoch": 1.104, "grad_norm": 0.5453115260739528, "learning_rate": 3.826120210219331e-05, "loss": 0.4072, "loss_nan_ranks": 0, "loss_rank_avg": 0.19349700212478638, "step": 690, "valid_targets_mean": 3093.0, "valid_targets_min": 754 }, { "epoch": 1.112, "grad_norm": 0.37877838490425225, "learning_rate": 3.8215354837715836e-05, "loss": 0.3834, "loss_nan_ranks": 0, "loss_rank_avg": 0.12431143969297409, "step": 695, "valid_targets_mean": 5127.8, "valid_targets_min": 707 }, { "epoch": 1.12, "grad_norm": 0.6610820231714544, "learning_rate": 3.816893918428631e-05, "loss": 0.3786, "loss_nan_ranks": 0, "loss_rank_avg": 0.18000784516334534, "step": 700, "valid_targets_mean": 2055.1, "valid_targets_min": 688 }, { "epoch": 1.1280000000000001, "grad_norm": 0.5300125572424695, "learning_rate": 3.8121956590251153e-05, "loss": 0.4069, "loss_nan_ranks": 0, "loss_rank_avg": 0.1879684329032898, "step": 705, "valid_targets_mean": 3468.1, "valid_targets_min": 786 }, { "epoch": 1.1360000000000001, "grad_norm": 0.44683055917689773, "learning_rate": 3.8074408521647576e-05, "loss": 0.3836, "loss_nan_ranks": 0, "loss_rank_avg": 0.20903831720352173, "step": 710, "valid_targets_mean": 5525.9, "valid_targets_min": 422 }, { "epoch": 1.144, "grad_norm": 0.5102572350380875, "learning_rate": 3.802629646215771e-05, "loss": 0.3792, "loss_nan_ranks": 0, "loss_rank_avg": 0.17337819933891296, "step": 715, "valid_targets_mean": 4684.0, "valid_targets_min": 1435 }, { "epoch": 1.152, "grad_norm": 0.5765523983200641, "learning_rate": 3.79776219130624e-05, "loss": 0.3577, "loss_nan_ranks": 0, "loss_rank_avg": 0.1805480420589447, "step": 720, "valid_targets_mean": 2605.1, "valid_targets_min": 1231 }, { "epoch": 1.16, "grad_norm": 0.4897830077820699, "learning_rate": 3.792838639319431e-05, "loss": 0.3629, "loss_nan_ranks": 0, "loss_rank_avg": 0.19262930750846863, "step": 725, "valid_targets_mean": 3851.5, "valid_targets_min": 1736 }, { "epoch": 1.168, "grad_norm": 0.4322207849895438, "learning_rate": 3.787859143889054e-05, "loss": 0.3539, "loss_nan_ranks": 0, "loss_rank_avg": 0.23677626252174377, "step": 730, "valid_targets_mean": 6349.4, "valid_targets_min": 1935 }, { "epoch": 1.176, "grad_norm": 0.5128262395764999, "learning_rate": 3.782823860394469e-05, "loss": 0.3568, "loss_nan_ranks": 0, "loss_rank_avg": 0.20344534516334534, "step": 735, "valid_targets_mean": 3306.5, "valid_targets_min": 1146 }, { "epoch": 1.184, "grad_norm": 0.574030810014404, "learning_rate": 3.777732945955841e-05, "loss": 0.39, "loss_nan_ranks": 0, "loss_rank_avg": 0.2069251835346222, "step": 740, "valid_targets_mean": 3683.5, "valid_targets_min": 823 }, { "epoch": 1.192, "grad_norm": 0.5501142652379903, "learning_rate": 3.772586559429229e-05, "loss": 0.359, "loss_nan_ranks": 0, "loss_rank_avg": 0.26781395077705383, "step": 745, "valid_targets_mean": 4816.9, "valid_targets_min": 1250 }, { "epoch": 1.2, "grad_norm": 0.4653456781818237, "learning_rate": 3.767384861401636e-05, "loss": 0.3919, "loss_nan_ranks": 0, "loss_rank_avg": 0.1659979671239853, "step": 750, "valid_targets_mean": 4349.2, "valid_targets_min": 847 }, { "epoch": 1.208, "grad_norm": 0.38274296561169047, "learning_rate": 3.762128014185998e-05, "loss": 0.3675, "loss_nan_ranks": 0, "loss_rank_avg": 0.2027568817138672, "step": 755, "valid_targets_mean": 7722.0, "valid_targets_min": 1045 }, { "epoch": 1.216, "grad_norm": 0.9407563287769353, "learning_rate": 3.7568161818161135e-05, "loss": 0.3896, "loss_nan_ranks": 0, "loss_rank_avg": 0.1349801868200302, "step": 760, "valid_targets_mean": 2931.2, "valid_targets_min": 690 }, { "epoch": 1.224, "grad_norm": 0.5295635911819686, "learning_rate": 3.751449530041532e-05, "loss": 0.3771, "loss_nan_ranks": 0, "loss_rank_avg": 0.17271582782268524, "step": 765, "valid_targets_mean": 3066.4, "valid_targets_min": 422 }, { "epoch": 1.232, "grad_norm": 0.543101523869541, "learning_rate": 3.7460282263223764e-05, "loss": 0.3767, "loss_nan_ranks": 0, "loss_rank_avg": 0.17287389934062958, "step": 770, "valid_targets_mean": 2902.2, "valid_targets_min": 1035 }, { "epoch": 1.24, "grad_norm": 0.5204988467550381, "learning_rate": 3.740552439824122e-05, "loss": 0.3991, "loss_nan_ranks": 0, "loss_rank_avg": 0.19387352466583252, "step": 775, "valid_targets_mean": 3014.8, "valid_targets_min": 789 }, { "epoch": 1.248, "grad_norm": 0.5793118660838545, "learning_rate": 3.735022341412314e-05, "loss": 0.352, "loss_nan_ranks": 0, "loss_rank_avg": 0.1868707835674286, "step": 780, "valid_targets_mean": 2748.2, "valid_targets_min": 1103 }, { "epoch": 1.256, "grad_norm": 0.49312789125401785, "learning_rate": 3.7294381036472386e-05, "loss": 0.3778, "loss_nan_ranks": 0, "loss_rank_avg": 0.14733919501304626, "step": 785, "valid_targets_mean": 3287.6, "valid_targets_min": 984 }, { "epoch": 1.264, "grad_norm": 0.41240222215312733, "learning_rate": 3.723799900778538e-05, "loss": 0.3789, "loss_nan_ranks": 0, "loss_rank_avg": 0.126112163066864, "step": 790, "valid_targets_mean": 3915.2, "valid_targets_min": 767 }, { "epoch": 1.272, "grad_norm": 0.4390287545765749, "learning_rate": 3.7181079087397705e-05, "loss": 0.3514, "loss_nan_ranks": 0, "loss_rank_avg": 0.15731275081634521, "step": 795, "valid_targets_mean": 4579.5, "valid_targets_min": 1110 }, { "epoch": 1.28, "grad_norm": 0.5857426000892533, "learning_rate": 3.712362305142926e-05, "loss": 0.382, "loss_nan_ranks": 0, "loss_rank_avg": 0.2865251302719116, "step": 800, "valid_targets_mean": 4268.1, "valid_targets_min": 903 }, { "epoch": 1.288, "grad_norm": 0.5522244993785961, "learning_rate": 3.706563269272878e-05, "loss": 0.4019, "loss_nan_ranks": 0, "loss_rank_avg": 0.1548284888267517, "step": 805, "valid_targets_mean": 5253.2, "valid_targets_min": 958 }, { "epoch": 1.296, "grad_norm": 0.49707542029017, "learning_rate": 3.700710982081794e-05, "loss": 0.3604, "loss_nan_ranks": 0, "loss_rank_avg": 0.1068187803030014, "step": 810, "valid_targets_mean": 2315.8, "valid_targets_min": 1005 }, { "epoch": 1.304, "grad_norm": 0.4597650989348005, "learning_rate": 3.694805626183486e-05, "loss": 0.3419, "loss_nan_ranks": 0, "loss_rank_avg": 0.1940535604953766, "step": 815, "valid_targets_mean": 6227.8, "valid_targets_min": 1092 }, { "epoch": 1.312, "grad_norm": 0.4807446238755427, "learning_rate": 3.688847385847711e-05, "loss": 0.3648, "loss_nan_ranks": 0, "loss_rank_avg": 0.2306816130876541, "step": 820, "valid_targets_mean": 4811.5, "valid_targets_min": 888 }, { "epoch": 1.32, "grad_norm": 0.37954247976442135, "learning_rate": 3.682836446994428e-05, "loss": 0.355, "loss_nan_ranks": 0, "loss_rank_avg": 0.11209504306316376, "step": 825, "valid_targets_mean": 4343.1, "valid_targets_min": 554 }, { "epoch": 1.328, "grad_norm": 0.6968928370006656, "learning_rate": 3.676772997187989e-05, "loss": 0.4238, "loss_nan_ranks": 0, "loss_rank_avg": 0.1730729341506958, "step": 830, "valid_targets_mean": 2439.0, "valid_targets_min": 598 }, { "epoch": 1.336, "grad_norm": 0.5644540602579898, "learning_rate": 3.670657225631289e-05, "loss": 0.3816, "loss_nan_ranks": 0, "loss_rank_avg": 0.2261945605278015, "step": 835, "valid_targets_mean": 3144.8, "valid_targets_min": 682 }, { "epoch": 1.3439999999999999, "grad_norm": 0.4518440551193136, "learning_rate": 3.6644893231598635e-05, "loss": 0.3949, "loss_nan_ranks": 0, "loss_rank_avg": 0.13814306259155273, "step": 840, "valid_targets_mean": 4653.5, "valid_targets_min": 595 }, { "epoch": 1.3519999999999999, "grad_norm": 0.5278282431174571, "learning_rate": 3.658269482235932e-05, "loss": 0.3715, "loss_nan_ranks": 0, "loss_rank_avg": 0.25394973158836365, "step": 845, "valid_targets_mean": 3977.8, "valid_targets_min": 1029 }, { "epoch": 1.3599999999999999, "grad_norm": 0.4752219437628507, "learning_rate": 3.651997896942394e-05, "loss": 0.3455, "loss_nan_ranks": 0, "loss_rank_avg": 0.12458296865224838, "step": 850, "valid_targets_mean": 3616.1, "valid_targets_min": 596 }, { "epoch": 1.3679999999999999, "grad_norm": 0.5099738101411846, "learning_rate": 3.645674762976769e-05, "loss": 0.3951, "loss_nan_ranks": 0, "loss_rank_avg": 0.22577358782291412, "step": 855, "valid_targets_mean": 4207.6, "valid_targets_min": 1149 }, { "epoch": 1.376, "grad_norm": 0.38904671121083256, "learning_rate": 3.639300277645096e-05, "loss": 0.358, "loss_nan_ranks": 0, "loss_rank_avg": 0.13607583940029144, "step": 860, "valid_targets_mean": 4714.9, "valid_targets_min": 1457 }, { "epoch": 1.384, "grad_norm": 0.3729523812901352, "learning_rate": 3.6328746398557715e-05, "loss": 0.3478, "loss_nan_ranks": 0, "loss_rank_avg": 0.1809740662574768, "step": 865, "valid_targets_mean": 5782.2, "valid_targets_min": 965 }, { "epoch": 1.392, "grad_norm": 1.3346441971185985, "learning_rate": 3.6263980501133466e-05, "loss": 0.3478, "loss_nan_ranks": 0, "loss_rank_avg": 0.11153702437877655, "step": 870, "valid_targets_mean": 2711.6, "valid_targets_min": 656 }, { "epoch": 1.4, "grad_norm": 0.4932114438780455, "learning_rate": 3.619870710512268e-05, "loss": 0.3879, "loss_nan_ranks": 0, "loss_rank_avg": 0.16475415229797363, "step": 875, "valid_targets_mean": 3808.2, "valid_targets_min": 573 }, { "epoch": 1.408, "grad_norm": 0.42202752832274765, "learning_rate": 3.6132928247305713e-05, "loss": 0.3583, "loss_nan_ranks": 0, "loss_rank_avg": 0.1160656213760376, "step": 880, "valid_targets_mean": 2949.6, "valid_targets_min": 959 }, { "epoch": 1.416, "grad_norm": 0.45638707283657404, "learning_rate": 3.60666459802353e-05, "loss": 0.4137, "loss_nan_ranks": 0, "loss_rank_avg": 0.1908377707004547, "step": 885, "valid_targets_mean": 4822.8, "valid_targets_min": 1414 }, { "epoch": 1.424, "grad_norm": 0.49655080228821374, "learning_rate": 3.599986237217245e-05, "loss": 0.3884, "loss_nan_ranks": 0, "loss_rank_avg": 0.14393456280231476, "step": 890, "valid_targets_mean": 3312.5, "valid_targets_min": 1288 }, { "epoch": 1.432, "grad_norm": 0.40994947703418805, "learning_rate": 3.593257950702194e-05, "loss": 0.3821, "loss_nan_ranks": 0, "loss_rank_avg": 0.19887655973434448, "step": 895, "valid_targets_mean": 5122.5, "valid_targets_min": 1689 }, { "epoch": 1.44, "grad_norm": 0.6100341519792659, "learning_rate": 3.586479948426728e-05, "loss": 0.4045, "loss_nan_ranks": 0, "loss_rank_avg": 0.29838448762893677, "step": 900, "valid_targets_mean": 3684.9, "valid_targets_min": 781 }, { "epoch": 1.448, "grad_norm": 0.40258620360925185, "learning_rate": 3.579652441890523e-05, "loss": 0.357, "loss_nan_ranks": 0, "loss_rank_avg": 0.15237580239772797, "step": 905, "valid_targets_mean": 4839.9, "valid_targets_min": 1123 }, { "epoch": 1.456, "grad_norm": 0.6002865400954618, "learning_rate": 3.572775644137974e-05, "loss": 0.3534, "loss_nan_ranks": 0, "loss_rank_avg": 0.14805619418621063, "step": 910, "valid_targets_mean": 1668.4, "valid_targets_min": 697 }, { "epoch": 1.464, "grad_norm": 0.34946528869800725, "learning_rate": 3.5658497697515534e-05, "loss": 0.3369, "loss_nan_ranks": 0, "loss_rank_avg": 0.1722603142261505, "step": 915, "valid_targets_mean": 6944.6, "valid_targets_min": 1284 }, { "epoch": 1.472, "grad_norm": 0.5550974904476521, "learning_rate": 3.558875034845113e-05, "loss": 0.3414, "loss_nan_ranks": 0, "loss_rank_avg": 0.1764376014471054, "step": 920, "valid_targets_mean": 5085.6, "valid_targets_min": 1302 }, { "epoch": 1.48, "grad_norm": 0.40211624001194296, "learning_rate": 3.551851657057139e-05, "loss": 0.3507, "loss_nan_ranks": 0, "loss_rank_avg": 0.18077786266803741, "step": 925, "valid_targets_mean": 6009.4, "valid_targets_min": 853 }, { "epoch": 1.488, "grad_norm": 0.42842599212123683, "learning_rate": 3.544779855543963e-05, "loss": 0.3435, "loss_nan_ranks": 0, "loss_rank_avg": 0.21971535682678223, "step": 930, "valid_targets_mean": 5389.8, "valid_targets_min": 1860 }, { "epoch": 1.496, "grad_norm": 0.5412809944985588, "learning_rate": 3.5376598509729226e-05, "loss": 0.3777, "loss_nan_ranks": 0, "loss_rank_avg": 0.20583224296569824, "step": 935, "valid_targets_mean": 3356.8, "valid_targets_min": 1087 }, { "epoch": 1.504, "grad_norm": 0.4090830832550061, "learning_rate": 3.5304918655154754e-05, "loss": 0.3964, "loss_nan_ranks": 0, "loss_rank_avg": 0.21631677448749542, "step": 940, "valid_targets_mean": 5357.0, "valid_targets_min": 1077 }, { "epoch": 1.512, "grad_norm": 0.6620976500410706, "learning_rate": 3.523276122840266e-05, "loss": 0.3548, "loss_nan_ranks": 0, "loss_rank_avg": 0.2258632481098175, "step": 945, "valid_targets_mean": 2185.2, "valid_targets_min": 671 }, { "epoch": 1.52, "grad_norm": 0.4976296422194412, "learning_rate": 3.516012848106149e-05, "loss": 0.3499, "loss_nan_ranks": 0, "loss_rank_avg": 0.18777544796466827, "step": 950, "valid_targets_mean": 3989.5, "valid_targets_min": 1180 }, { "epoch": 1.528, "grad_norm": 0.4230732174600183, "learning_rate": 3.5087022679551614e-05, "loss": 0.3575, "loss_nan_ranks": 0, "loss_rank_avg": 0.19989222288131714, "step": 955, "valid_targets_mean": 4162.1, "valid_targets_min": 1657 }, { "epoch": 1.536, "grad_norm": 0.5275885905547862, "learning_rate": 3.5013446105054486e-05, "loss": 0.356, "loss_nan_ranks": 0, "loss_rank_avg": 0.17071036994457245, "step": 960, "valid_targets_mean": 2735.5, "valid_targets_min": 907 }, { "epoch": 1.544, "grad_norm": 0.46078220045065427, "learning_rate": 3.493940105344152e-05, "loss": 0.3706, "loss_nan_ranks": 0, "loss_rank_avg": 0.1477717161178589, "step": 965, "valid_targets_mean": 2956.9, "valid_targets_min": 947 }, { "epoch": 1.552, "grad_norm": 0.3995299747785965, "learning_rate": 3.4864889835202366e-05, "loss": 0.3534, "loss_nan_ranks": 0, "loss_rank_avg": 0.18769648671150208, "step": 970, "valid_targets_mean": 5725.6, "valid_targets_min": 1999 }, { "epoch": 1.56, "grad_norm": 0.64944906147947, "learning_rate": 3.4789914775372905e-05, "loss": 0.3862, "loss_nan_ranks": 0, "loss_rank_avg": 0.24089542031288147, "step": 975, "valid_targets_mean": 2843.6, "valid_targets_min": 869 }, { "epoch": 1.568, "grad_norm": 0.5425019910583312, "learning_rate": 3.471447821346264e-05, "loss": 0.3922, "loss_nan_ranks": 0, "loss_rank_avg": 0.15392085909843445, "step": 980, "valid_targets_mean": 2680.6, "valid_targets_min": 908 }, { "epoch": 1.576, "grad_norm": 0.5368209449228799, "learning_rate": 3.463858250338168e-05, "loss": 0.396, "loss_nan_ranks": 0, "loss_rank_avg": 0.1622292697429657, "step": 985, "valid_targets_mean": 2758.0, "valid_targets_min": 869 }, { "epoch": 1.584, "grad_norm": 0.43598303845384095, "learning_rate": 3.4562230013367374e-05, "loss": 0.4045, "loss_nan_ranks": 0, "loss_rank_avg": 0.21002206206321716, "step": 990, "valid_targets_mean": 4658.1, "valid_targets_min": 1457 }, { "epoch": 1.592, "grad_norm": 0.3904901071899063, "learning_rate": 3.448542312591032e-05, "loss": 0.37, "loss_nan_ranks": 0, "loss_rank_avg": 0.20996427536010742, "step": 995, "valid_targets_mean": 6088.1, "valid_targets_min": 688 }, { "epoch": 1.6, "grad_norm": 0.41487996792002385, "learning_rate": 3.440816423768007e-05, "loss": 0.3465, "loss_nan_ranks": 0, "loss_rank_avg": 0.2232140749692917, "step": 1000, "valid_targets_mean": 6249.6, "valid_targets_min": 790 }, { "epoch": 1.608, "grad_norm": 0.41215706864500073, "learning_rate": 3.433045575945031e-05, "loss": 0.3747, "loss_nan_ranks": 0, "loss_rank_avg": 0.1780746430158615, "step": 1005, "valid_targets_mean": 6710.1, "valid_targets_min": 720 }, { "epoch": 1.616, "grad_norm": 0.5427836394723989, "learning_rate": 3.42523001160237e-05, "loss": 0.4069, "loss_nan_ranks": 0, "loss_rank_avg": 0.22656163573265076, "step": 1010, "valid_targets_mean": 3230.6, "valid_targets_min": 1009 }, { "epoch": 1.624, "grad_norm": 0.4790601799537503, "learning_rate": 3.417369974615615e-05, "loss": 0.3731, "loss_nan_ranks": 0, "loss_rank_avg": 0.1649462878704071, "step": 1015, "valid_targets_mean": 4221.9, "valid_targets_min": 1007 }, { "epoch": 1.6320000000000001, "grad_norm": 0.4765491643369093, "learning_rate": 3.409465710248074e-05, "loss": 0.3515, "loss_nan_ranks": 0, "loss_rank_avg": 0.14106625318527222, "step": 1020, "valid_targets_mean": 2703.8, "valid_targets_min": 1020 }, { "epoch": 1.6400000000000001, "grad_norm": 0.49781135493555667, "learning_rate": 3.401517465143119e-05, "loss": 0.3895, "loss_nan_ranks": 0, "loss_rank_avg": 0.21954402327537537, "step": 1025, "valid_targets_mean": 4009.2, "valid_targets_min": 1460 }, { "epoch": 1.6480000000000001, "grad_norm": 0.45112165406620686, "learning_rate": 3.393525487316489e-05, "loss": 0.3614, "loss_nan_ranks": 0, "loss_rank_avg": 0.18274998664855957, "step": 1030, "valid_targets_mean": 4840.9, "valid_targets_min": 1708 }, { "epoch": 1.6560000000000001, "grad_norm": 0.4693212042872953, "learning_rate": 3.385490026148554e-05, "loss": 0.4153, "loss_nan_ranks": 0, "loss_rank_avg": 0.1388929784297943, "step": 1035, "valid_targets_mean": 3562.4, "valid_targets_min": 625 }, { "epoch": 1.6640000000000001, "grad_norm": 0.37397480223788876, "learning_rate": 3.377411332376529e-05, "loss": 0.3642, "loss_nan_ranks": 0, "loss_rank_avg": 0.2300184965133667, "step": 1040, "valid_targets_mean": 7929.9, "valid_targets_min": 994 }, { "epoch": 1.6720000000000002, "grad_norm": 0.3841145063100303, "learning_rate": 3.369289658086651e-05, "loss": 0.3544, "loss_nan_ranks": 0, "loss_rank_avg": 0.13292424380779266, "step": 1045, "valid_targets_mean": 3923.2, "valid_targets_min": 659 }, { "epoch": 1.6800000000000002, "grad_norm": 0.37819794098936316, "learning_rate": 3.3611252567063184e-05, "loss": 0.36, "loss_nan_ranks": 0, "loss_rank_avg": 0.19257867336273193, "step": 1050, "valid_targets_mean": 7234.4, "valid_targets_min": 1669 }, { "epoch": 1.688, "grad_norm": 0.4307477074202791, "learning_rate": 3.352918382996174e-05, "loss": 0.354, "loss_nan_ranks": 0, "loss_rank_avg": 0.19784092903137207, "step": 1055, "valid_targets_mean": 4706.8, "valid_targets_min": 1256 }, { "epoch": 1.696, "grad_norm": 0.47999337411149967, "learning_rate": 3.344669293042163e-05, "loss": 0.3863, "loss_nan_ranks": 0, "loss_rank_avg": 0.19967739284038544, "step": 1060, "valid_targets_mean": 3586.5, "valid_targets_min": 1024 }, { "epoch": 1.704, "grad_norm": 0.4958715076592039, "learning_rate": 3.336378244247539e-05, "loss": 0.3851, "loss_nan_ranks": 0, "loss_rank_avg": 0.25138044357299805, "step": 1065, "valid_targets_mean": 4725.0, "valid_targets_min": 877 }, { "epoch": 1.712, "grad_norm": 0.44982531036880435, "learning_rate": 3.3280454953248326e-05, "loss": 0.3318, "loss_nan_ranks": 0, "loss_rank_avg": 0.18653461337089539, "step": 1070, "valid_targets_mean": 4260.8, "valid_targets_min": 1106 }, { "epoch": 1.72, "grad_norm": 0.4523598014038419, "learning_rate": 3.3196713062877765e-05, "loss": 0.3524, "loss_nan_ranks": 0, "loss_rank_avg": 0.14612746238708496, "step": 1075, "valid_targets_mean": 4109.9, "valid_targets_min": 896 }, { "epoch": 1.728, "grad_norm": 0.40758165227767357, "learning_rate": 3.311255938443196e-05, "loss": 0.3723, "loss_nan_ranks": 0, "loss_rank_avg": 0.13389909267425537, "step": 1080, "valid_targets_mean": 4376.0, "valid_targets_min": 950 }, { "epoch": 1.736, "grad_norm": 0.5491124893709626, "learning_rate": 3.3027996543828524e-05, "loss": 0.3695, "loss_nan_ranks": 0, "loss_rank_avg": 0.18468721210956573, "step": 1085, "valid_targets_mean": 2868.6, "valid_targets_min": 849 }, { "epoch": 1.744, "grad_norm": 0.3993174672092302, "learning_rate": 3.2943027179752494e-05, "loss": 0.3416, "loss_nan_ranks": 0, "loss_rank_avg": 0.20559825003147125, "step": 1090, "valid_targets_mean": 6660.8, "valid_targets_min": 760 }, { "epoch": 1.752, "grad_norm": 0.40380661411621366, "learning_rate": 3.285765394357401e-05, "loss": 0.3312, "loss_nan_ranks": 0, "loss_rank_avg": 0.15859174728393555, "step": 1095, "valid_targets_mean": 4548.8, "valid_targets_min": 829 }, { "epoch": 1.76, "grad_norm": 0.5382918290555533, "learning_rate": 3.277187949926556e-05, "loss": 0.3523, "loss_nan_ranks": 0, "loss_rank_avg": 0.21806976199150085, "step": 1100, "valid_targets_mean": 3312.4, "valid_targets_min": 1025 }, { "epoch": 1.768, "grad_norm": 0.4145669760422594, "learning_rate": 3.268570652331888e-05, "loss": 0.3984, "loss_nan_ranks": 0, "loss_rank_avg": 0.1447543501853943, "step": 1105, "valid_targets_mean": 3821.0, "valid_targets_min": 722 }, { "epoch": 1.776, "grad_norm": 0.6163574474026122, "learning_rate": 3.2599137704661405e-05, "loss": 0.3596, "loss_nan_ranks": 0, "loss_rank_avg": 0.21196293830871582, "step": 1110, "valid_targets_mean": 2316.6, "valid_targets_min": 586 }, { "epoch": 1.784, "grad_norm": 0.47460067439094217, "learning_rate": 3.251217574457239e-05, "loss": 0.3742, "loss_nan_ranks": 0, "loss_rank_avg": 0.2540559768676758, "step": 1115, "valid_targets_mean": 4955.0, "valid_targets_min": 1701 }, { "epoch": 1.792, "grad_norm": 0.4624184685618981, "learning_rate": 3.242482335659861e-05, "loss": 0.3834, "loss_nan_ranks": 0, "loss_rank_avg": 0.18782807886600494, "step": 1120, "valid_targets_mean": 4641.6, "valid_targets_min": 785 }, { "epoch": 1.8, "grad_norm": 0.49672506088763413, "learning_rate": 3.2337083266469687e-05, "loss": 0.3983, "loss_nan_ranks": 0, "loss_rank_avg": 0.24676811695098877, "step": 1125, "valid_targets_mean": 4641.5, "valid_targets_min": 803 }, { "epoch": 1.808, "grad_norm": 0.4252619396562025, "learning_rate": 3.224895821201304e-05, "loss": 0.3789, "loss_nan_ranks": 0, "loss_rank_avg": 0.2329331487417221, "step": 1130, "valid_targets_mean": 5197.2, "valid_targets_min": 760 }, { "epoch": 1.8159999999999998, "grad_norm": 0.4185531683126807, "learning_rate": 3.2160450943068446e-05, "loss": 0.3662, "loss_nan_ranks": 0, "loss_rank_avg": 0.16280978918075562, "step": 1135, "valid_targets_mean": 5254.2, "valid_targets_min": 2000 }, { "epoch": 1.8239999999999998, "grad_norm": 0.696452870548893, "learning_rate": 3.207156422140225e-05, "loss": 0.4045, "loss_nan_ranks": 0, "loss_rank_avg": 0.19130727648735046, "step": 1140, "valid_targets_mean": 1899.5, "valid_targets_min": 771 }, { "epoch": 1.8319999999999999, "grad_norm": 0.48431295913248046, "learning_rate": 3.198230082062115e-05, "loss": 0.3836, "loss_nan_ranks": 0, "loss_rank_avg": 0.2074839472770691, "step": 1145, "valid_targets_mean": 5449.5, "valid_targets_min": 852 }, { "epoch": 1.8399999999999999, "grad_norm": 0.4133281290019396, "learning_rate": 3.189266352608574e-05, "loss": 0.366, "loss_nan_ranks": 0, "loss_rank_avg": 0.1883474886417389, "step": 1150, "valid_targets_mean": 4908.2, "valid_targets_min": 1127 }, { "epoch": 1.8479999999999999, "grad_norm": 0.40434980197549936, "learning_rate": 3.180265513482345e-05, "loss": 0.3366, "loss_nan_ranks": 0, "loss_rank_avg": 0.15408740937709808, "step": 1155, "valid_targets_mean": 4307.4, "valid_targets_min": 892 }, { "epoch": 1.8559999999999999, "grad_norm": 0.44482757502979986, "learning_rate": 3.171227845544143e-05, "loss": 0.3676, "loss_nan_ranks": 0, "loss_rank_avg": 0.2276269793510437, "step": 1160, "valid_targets_mean": 4911.0, "valid_targets_min": 957 }, { "epoch": 1.8639999999999999, "grad_norm": 0.43600977689095316, "learning_rate": 3.162153630803877e-05, "loss": 0.3542, "loss_nan_ranks": 0, "loss_rank_avg": 0.15386472642421722, "step": 1165, "valid_targets_mean": 4215.6, "valid_targets_min": 1328 }, { "epoch": 1.8719999999999999, "grad_norm": 0.37429615582479875, "learning_rate": 3.153043152411861e-05, "loss": 0.3945, "loss_nan_ranks": 0, "loss_rank_avg": 0.1603434681892395, "step": 1170, "valid_targets_mean": 4956.5, "valid_targets_min": 1233 }, { "epoch": 1.88, "grad_norm": 0.42018805399081294, "learning_rate": 3.14389669464997e-05, "loss": 0.367, "loss_nan_ranks": 0, "loss_rank_avg": 0.13551610708236694, "step": 1175, "valid_targets_mean": 3497.2, "valid_targets_min": 699 }, { "epoch": 1.888, "grad_norm": 0.4485191669176861, "learning_rate": 3.134714542922777e-05, "loss": 0.3694, "loss_nan_ranks": 0, "loss_rank_avg": 0.1404634714126587, "step": 1180, "valid_targets_mean": 3324.0, "valid_targets_min": 857 }, { "epoch": 1.896, "grad_norm": 0.5320471292171671, "learning_rate": 3.1254969837486425e-05, "loss": 0.3528, "loss_nan_ranks": 0, "loss_rank_avg": 0.22733406722545624, "step": 1185, "valid_targets_mean": 3494.6, "valid_targets_min": 1049 }, { "epoch": 1.904, "grad_norm": 0.4429069138743051, "learning_rate": 3.116244304750774e-05, "loss": 0.3441, "loss_nan_ranks": 0, "loss_rank_avg": 0.21461255848407745, "step": 1190, "valid_targets_mean": 5988.0, "valid_targets_min": 1106 }, { "epoch": 1.912, "grad_norm": 0.4909918363267137, "learning_rate": 3.106956794648254e-05, "loss": 0.3888, "loss_nan_ranks": 0, "loss_rank_avg": 0.3207101821899414, "step": 1195, "valid_targets_mean": 5188.9, "valid_targets_min": 1740 }, { "epoch": 1.92, "grad_norm": 0.4380391893049689, "learning_rate": 3.097634743247026e-05, "loss": 0.3777, "loss_nan_ranks": 0, "loss_rank_avg": 0.20924848318099976, "step": 1200, "valid_targets_mean": 5444.6, "valid_targets_min": 859 }, { "epoch": 1.928, "grad_norm": 0.39395020883135107, "learning_rate": 3.08827844143086e-05, "loss": 0.3606, "loss_nan_ranks": 0, "loss_rank_avg": 0.1892511397600174, "step": 1205, "valid_targets_mean": 4884.9, "valid_targets_min": 1116 }, { "epoch": 1.936, "grad_norm": 0.4431767573115503, "learning_rate": 3.078888181152264e-05, "loss": 0.3794, "loss_nan_ranks": 0, "loss_rank_avg": 0.12274126708507538, "step": 1210, "valid_targets_mean": 2207.4, "valid_targets_min": 565 }, { "epoch": 1.944, "grad_norm": 0.4393189555489097, "learning_rate": 3.0694642554233855e-05, "loss": 0.3684, "loss_nan_ranks": 0, "loss_rank_avg": 0.1852644830942154, "step": 1215, "valid_targets_mean": 3580.2, "valid_targets_min": 847 }, { "epoch": 1.952, "grad_norm": 0.5175461857648035, "learning_rate": 3.0600069583068594e-05, "loss": 0.3874, "loss_nan_ranks": 0, "loss_rank_avg": 0.13556857407093048, "step": 1220, "valid_targets_mean": 2068.8, "valid_targets_min": 911 }, { "epoch": 1.96, "grad_norm": 0.4344897474377549, "learning_rate": 3.0505165849066394e-05, "loss": 0.3416, "loss_nan_ranks": 0, "loss_rank_avg": 0.16121706366539001, "step": 1225, "valid_targets_mean": 4083.2, "valid_targets_min": 1160 }, { "epoch": 1.968, "grad_norm": 0.3998726023920137, "learning_rate": 3.040993431358782e-05, "loss": 0.3748, "loss_nan_ranks": 0, "loss_rank_avg": 0.20173384249210358, "step": 1230, "valid_targets_mean": 5485.5, "valid_targets_min": 795 }, { "epoch": 1.976, "grad_norm": 0.43530746416898686, "learning_rate": 3.031437794822215e-05, "loss": 0.3352, "loss_nan_ranks": 0, "loss_rank_avg": 0.1987071931362152, "step": 1235, "valid_targets_mean": 4561.8, "valid_targets_min": 737 }, { "epoch": 1.984, "grad_norm": 0.45516788943319536, "learning_rate": 3.021849973469455e-05, "loss": 0.3869, "loss_nan_ranks": 0, "loss_rank_avg": 0.18358221650123596, "step": 1240, "valid_targets_mean": 3207.0, "valid_targets_min": 998 }, { "epoch": 1.992, "grad_norm": 0.4576579120788942, "learning_rate": 3.012230266477313e-05, "loss": 0.3758, "loss_nan_ranks": 0, "loss_rank_avg": 0.11642518639564514, "step": 1245, "valid_targets_mean": 2846.4, "valid_targets_min": 645 }, { "epoch": 2.0, "grad_norm": 0.41687844040354605, "learning_rate": 3.0025789740175502e-05, "loss": 0.3621, "loss_nan_ranks": 0, "loss_rank_avg": 0.22766928374767303, "step": 1250, "valid_targets_mean": 5502.0, "valid_targets_min": 804 }, { "epoch": 2.008, "grad_norm": 0.39445917384438633, "learning_rate": 2.9928963972475186e-05, "loss": 0.3294, "loss_nan_ranks": 0, "loss_rank_avg": 0.18963614106178284, "step": 1255, "valid_targets_mean": 6827.1, "valid_targets_min": 716 }, { "epoch": 2.016, "grad_norm": 0.47795453215109707, "learning_rate": 2.9831828383007585e-05, "loss": 0.3477, "loss_nan_ranks": 0, "loss_rank_avg": 0.16897451877593994, "step": 1260, "valid_targets_mean": 3771.0, "valid_targets_min": 1365 }, { "epoch": 2.024, "grad_norm": 0.40551985391746687, "learning_rate": 2.9734386002775754e-05, "loss": 0.3464, "loss_nan_ranks": 0, "loss_rank_avg": 0.11677554249763489, "step": 1265, "valid_targets_mean": 4102.6, "valid_targets_min": 621 }, { "epoch": 2.032, "grad_norm": 0.42377235398466956, "learning_rate": 2.963663987235577e-05, "loss": 0.3505, "loss_nan_ranks": 0, "loss_rank_avg": 0.1520041525363922, "step": 1270, "valid_targets_mean": 5001.9, "valid_targets_min": 721 }, { "epoch": 2.04, "grad_norm": 0.4806596558991801, "learning_rate": 2.95385930418019e-05, "loss": 0.3668, "loss_nan_ranks": 0, "loss_rank_avg": 0.18150800466537476, "step": 1275, "valid_targets_mean": 3979.8, "valid_targets_min": 734 }, { "epoch": 2.048, "grad_norm": 0.45923619311529357, "learning_rate": 2.9440248570551406e-05, "loss": 0.3577, "loss_nan_ranks": 0, "loss_rank_avg": 0.17516975104808807, "step": 1280, "valid_targets_mean": 4615.6, "valid_targets_min": 732 }, { "epoch": 2.056, "grad_norm": 0.43037695871723236, "learning_rate": 2.934160952732907e-05, "loss": 0.3065, "loss_nan_ranks": 0, "loss_rank_avg": 0.1621362715959549, "step": 1285, "valid_targets_mean": 4676.5, "valid_targets_min": 1619 }, { "epoch": 2.064, "grad_norm": 0.5278235222224164, "learning_rate": 2.9242678990051462e-05, "loss": 0.3165, "loss_nan_ranks": 0, "loss_rank_avg": 0.12950673699378967, "step": 1290, "valid_targets_mean": 3176.2, "valid_targets_min": 820 }, { "epoch": 2.072, "grad_norm": 0.591249312950412, "learning_rate": 2.9143460045730886e-05, "loss": 0.3165, "loss_nan_ranks": 0, "loss_rank_avg": 0.25125831365585327, "step": 1295, "valid_targets_mean": 4605.6, "valid_targets_min": 1051 }, { "epoch": 2.08, "grad_norm": 0.4633677094496606, "learning_rate": 2.9043955790379035e-05, "loss": 0.3412, "loss_nan_ranks": 0, "loss_rank_avg": 0.15374936163425446, "step": 1300, "valid_targets_mean": 3456.1, "valid_targets_min": 596 }, { "epoch": 2.088, "grad_norm": 0.6839077748868694, "learning_rate": 2.8944169328910427e-05, "loss": 0.3314, "loss_nan_ranks": 0, "loss_rank_avg": 0.21061047911643982, "step": 1305, "valid_targets_mean": 2743.4, "valid_targets_min": 736 }, { "epoch": 2.096, "grad_norm": 0.40010179262506984, "learning_rate": 2.884410377504547e-05, "loss": 0.3489, "loss_nan_ranks": 0, "loss_rank_avg": 0.20529218018054962, "step": 1310, "valid_targets_mean": 5945.4, "valid_targets_min": 1603 }, { "epoch": 2.104, "grad_norm": 0.46363378675791567, "learning_rate": 2.8743762251213333e-05, "loss": 0.3516, "loss_nan_ranks": 0, "loss_rank_avg": 0.13235880434513092, "step": 1315, "valid_targets_mean": 3557.6, "valid_targets_min": 909 }, { "epoch": 2.112, "grad_norm": 0.5226657341187503, "learning_rate": 2.8643147888454507e-05, "loss": 0.321, "loss_nan_ranks": 0, "loss_rank_avg": 0.17466039955615997, "step": 1320, "valid_targets_mean": 5333.5, "valid_targets_min": 948 }, { "epoch": 2.12, "grad_norm": 0.5688435709083185, "learning_rate": 2.854226382632312e-05, "loss": 0.3687, "loss_nan_ranks": 0, "loss_rank_avg": 0.19334635138511658, "step": 1325, "valid_targets_mean": 3006.2, "valid_targets_min": 607 }, { "epoch": 2.128, "grad_norm": 0.3938353360765779, "learning_rate": 2.844111321278893e-05, "loss": 0.3356, "loss_nan_ranks": 0, "loss_rank_avg": 0.18020674586296082, "step": 1330, "valid_targets_mean": 7703.2, "valid_targets_min": 2069 }, { "epoch": 2.136, "grad_norm": 0.6166572236379344, "learning_rate": 2.833969920413913e-05, "loss": 0.3654, "loss_nan_ranks": 0, "loss_rank_avg": 0.15063633024692535, "step": 1335, "valid_targets_mean": 2484.9, "valid_targets_min": 588 }, { "epoch": 2.144, "grad_norm": 0.4594292950880163, "learning_rate": 2.8238024964879857e-05, "loss": 0.3588, "loss_nan_ranks": 0, "loss_rank_avg": 0.1551552265882492, "step": 1340, "valid_targets_mean": 4537.5, "valid_targets_min": 1203 }, { "epoch": 2.152, "grad_norm": 0.4759453483875312, "learning_rate": 2.8136093667637438e-05, "loss": 0.3608, "loss_nan_ranks": 0, "loss_rank_avg": 0.19271422922611237, "step": 1345, "valid_targets_mean": 5253.5, "valid_targets_min": 634 }, { "epoch": 2.16, "grad_norm": 0.4274589956773415, "learning_rate": 2.8033908493059394e-05, "loss": 0.3447, "loss_nan_ranks": 0, "loss_rank_avg": 0.26053884625434875, "step": 1350, "valid_targets_mean": 7490.9, "valid_targets_min": 696 }, { "epoch": 2.168, "grad_norm": 0.48920586887692913, "learning_rate": 2.793147262971519e-05, "loss": 0.3384, "loss_nan_ranks": 0, "loss_rank_avg": 0.20303943753242493, "step": 1355, "valid_targets_mean": 3912.5, "valid_targets_min": 1093 }, { "epoch": 2.176, "grad_norm": 0.3805101077019346, "learning_rate": 2.7828789273996748e-05, "loss": 0.3513, "loss_nan_ranks": 0, "loss_rank_avg": 0.1607498824596405, "step": 1360, "valid_targets_mean": 7038.1, "valid_targets_min": 950 }, { "epoch": 2.184, "grad_norm": 0.4436216784104909, "learning_rate": 2.7725861630018703e-05, "loss": 0.3757, "loss_nan_ranks": 0, "loss_rank_avg": 0.24152888357639313, "step": 1365, "valid_targets_mean": 6961.2, "valid_targets_min": 2951 }, { "epoch": 2.192, "grad_norm": 0.49361804115715835, "learning_rate": 2.7622692909518423e-05, "loss": 0.3367, "loss_nan_ranks": 0, "loss_rank_avg": 0.1655840426683426, "step": 1370, "valid_targets_mean": 3099.6, "valid_targets_min": 776 }, { "epoch": 2.2, "grad_norm": 0.3908270372770131, "learning_rate": 2.7519286331755766e-05, "loss": 0.3292, "loss_nan_ranks": 0, "loss_rank_avg": 0.20059353113174438, "step": 1375, "valid_targets_mean": 6875.1, "valid_targets_min": 863 }, { "epoch": 2.208, "grad_norm": 0.3381814363001518, "learning_rate": 2.7415645123412672e-05, "loss": 0.3038, "loss_nan_ranks": 0, "loss_rank_avg": 0.14437264204025269, "step": 1380, "valid_targets_mean": 5681.9, "valid_targets_min": 1259 }, { "epoch": 2.216, "grad_norm": 0.4335970734003149, "learning_rate": 2.731177251849246e-05, "loss": 0.3905, "loss_nan_ranks": 0, "loss_rank_avg": 0.12784774601459503, "step": 1385, "valid_targets_mean": 3582.9, "valid_targets_min": 1328 }, { "epoch": 2.224, "grad_norm": 0.5013659443395456, "learning_rate": 2.7207671758218884e-05, "loss": 0.324, "loss_nan_ranks": 0, "loss_rank_avg": 0.20887424051761627, "step": 1390, "valid_targets_mean": 3934.0, "valid_targets_min": 1019 }, { "epoch": 2.232, "grad_norm": 0.5451971071443736, "learning_rate": 2.710334609093504e-05, "loss": 0.3229, "loss_nan_ranks": 0, "loss_rank_avg": 0.12654241919517517, "step": 1395, "valid_targets_mean": 2552.6, "valid_targets_min": 1080 }, { "epoch": 2.24, "grad_norm": 0.5275185534002302, "learning_rate": 2.699879877200198e-05, "loss": 0.3476, "loss_nan_ranks": 0, "loss_rank_avg": 0.17860426008701324, "step": 1400, "valid_targets_mean": 4925.5, "valid_targets_min": 1138 }, { "epoch": 2.248, "grad_norm": 0.4750860893446014, "learning_rate": 2.6894033063697143e-05, "loss": 0.3167, "loss_nan_ranks": 0, "loss_rank_avg": 0.16186845302581787, "step": 1405, "valid_targets_mean": 3735.0, "valid_targets_min": 484 }, { "epoch": 2.2560000000000002, "grad_norm": 0.5373568486267774, "learning_rate": 2.6789052235112554e-05, "loss": 0.3537, "loss_nan_ranks": 0, "loss_rank_avg": 0.22873114049434662, "step": 1410, "valid_targets_mean": 4564.0, "valid_targets_min": 632 }, { "epoch": 2.2640000000000002, "grad_norm": 0.5788908168658097, "learning_rate": 2.66838595620528e-05, "loss": 0.3401, "loss_nan_ranks": 0, "loss_rank_avg": 0.2733263075351715, "step": 1415, "valid_targets_mean": 4478.9, "valid_targets_min": 673 }, { "epoch": 2.2720000000000002, "grad_norm": 0.586889266057967, "learning_rate": 2.6578458326932842e-05, "loss": 0.3683, "loss_nan_ranks": 0, "loss_rank_avg": 0.17043578624725342, "step": 1420, "valid_targets_mean": 2696.2, "valid_targets_min": 811 }, { "epoch": 2.2800000000000002, "grad_norm": 0.493980927370087, "learning_rate": 2.6472851818675583e-05, "loss": 0.3391, "loss_nan_ranks": 0, "loss_rank_avg": 0.2201269567012787, "step": 1425, "valid_targets_mean": 4983.6, "valid_targets_min": 810 }, { "epoch": 2.288, "grad_norm": 0.3286261942090746, "learning_rate": 2.6367043332609223e-05, "loss": 0.3392, "loss_nan_ranks": 0, "loss_rank_avg": 0.13666635751724243, "step": 1430, "valid_targets_mean": 7580.5, "valid_targets_min": 1536 }, { "epoch": 2.296, "grad_norm": 0.4699948466545275, "learning_rate": 2.6261036170364448e-05, "loss": 0.3476, "loss_nan_ranks": 0, "loss_rank_avg": 0.1846490055322647, "step": 1435, "valid_targets_mean": 3870.8, "valid_targets_min": 660 }, { "epoch": 2.304, "grad_norm": 0.6157253523596955, "learning_rate": 2.6154833639771415e-05, "loss": 0.362, "loss_nan_ranks": 0, "loss_rank_avg": 0.22415241599082947, "step": 1440, "valid_targets_mean": 2897.1, "valid_targets_min": 1086 }, { "epoch": 2.312, "grad_norm": 0.45969123667163286, "learning_rate": 2.6048439054756492e-05, "loss": 0.3217, "loss_nan_ranks": 0, "loss_rank_avg": 0.17089970409870148, "step": 1445, "valid_targets_mean": 4007.8, "valid_targets_min": 905 }, { "epoch": 2.32, "grad_norm": 0.4423705287499431, "learning_rate": 2.594185573523892e-05, "loss": 0.3455, "loss_nan_ranks": 0, "loss_rank_avg": 0.1676362156867981, "step": 1450, "valid_targets_mean": 8078.1, "valid_targets_min": 969 }, { "epoch": 2.328, "grad_norm": 0.5054530871166943, "learning_rate": 2.583508700702716e-05, "loss": 0.3509, "loss_nan_ranks": 0, "loss_rank_avg": 0.13996833562850952, "step": 1455, "valid_targets_mean": 3282.5, "valid_targets_min": 864 }, { "epoch": 2.336, "grad_norm": 0.4468904131293525, "learning_rate": 2.572813620171513e-05, "loss": 0.3255, "loss_nan_ranks": 0, "loss_rank_avg": 0.1576099395751953, "step": 1460, "valid_targets_mean": 4571.8, "valid_targets_min": 1272 }, { "epoch": 2.344, "grad_norm": 0.4624486850943764, "learning_rate": 2.5621006656578267e-05, "loss": 0.3153, "loss_nan_ranks": 0, "loss_rank_avg": 0.15907934308052063, "step": 1465, "valid_targets_mean": 4421.0, "valid_targets_min": 982 }, { "epoch": 2.352, "grad_norm": 0.4307528801295405, "learning_rate": 2.5513701714469373e-05, "loss": 0.3714, "loss_nan_ranks": 0, "loss_rank_avg": 0.1317245364189148, "step": 1470, "valid_targets_mean": 3624.6, "valid_targets_min": 854 }, { "epoch": 2.36, "grad_norm": 0.5446119589065747, "learning_rate": 2.540622472371429e-05, "loss": 0.3409, "loss_nan_ranks": 0, "loss_rank_avg": 0.1581343561410904, "step": 1475, "valid_targets_mean": 3219.8, "valid_targets_min": 535 }, { "epoch": 2.368, "grad_norm": 0.7703262680045895, "learning_rate": 2.5298579038007478e-05, "loss": 0.351, "loss_nan_ranks": 0, "loss_rank_avg": 0.18610429763793945, "step": 1480, "valid_targets_mean": 3620.9, "valid_targets_min": 1339 }, { "epoch": 2.376, "grad_norm": 0.4079087647088371, "learning_rate": 2.519076801630727e-05, "loss": 0.3062, "loss_nan_ranks": 0, "loss_rank_avg": 0.14258792996406555, "step": 1485, "valid_targets_mean": 5352.1, "valid_targets_min": 1055 }, { "epoch": 2.384, "grad_norm": 0.514685400749292, "learning_rate": 2.508279502273117e-05, "loss": 0.335, "loss_nan_ranks": 0, "loss_rank_avg": 0.20326395332813263, "step": 1490, "valid_targets_mean": 4564.5, "valid_targets_min": 974 }, { "epoch": 2.392, "grad_norm": 0.5844991616700916, "learning_rate": 2.4974663426450798e-05, "loss": 0.3599, "loss_nan_ranks": 0, "loss_rank_avg": 0.2439168244600296, "step": 1495, "valid_targets_mean": 3669.2, "valid_targets_min": 1138 }, { "epoch": 2.4, "grad_norm": 0.4534268106459526, "learning_rate": 2.4866376601586798e-05, "loss": 0.3367, "loss_nan_ranks": 0, "loss_rank_avg": 0.14468619227409363, "step": 1500, "valid_targets_mean": 4236.1, "valid_targets_min": 337 }, { "epoch": 2.408, "grad_norm": 0.37149672658326494, "learning_rate": 2.475793792710352e-05, "loss": 0.3079, "loss_nan_ranks": 0, "loss_rank_avg": 0.15612857043743134, "step": 1505, "valid_targets_mean": 5706.2, "valid_targets_min": 1033 }, { "epoch": 2.416, "grad_norm": 0.431204790605149, "learning_rate": 2.4649350786703637e-05, "loss": 0.3472, "loss_nan_ranks": 0, "loss_rank_avg": 0.15120291709899902, "step": 1510, "valid_targets_mean": 3504.2, "valid_targets_min": 537 }, { "epoch": 2.424, "grad_norm": 0.38058066092756215, "learning_rate": 2.45406185687225e-05, "loss": 0.3334, "loss_nan_ranks": 0, "loss_rank_avg": 0.2249833345413208, "step": 1515, "valid_targets_mean": 7466.4, "valid_targets_min": 877 }, { "epoch": 2.432, "grad_norm": 0.4958529733126483, "learning_rate": 2.443174466602246e-05, "loss": 0.3362, "loss_nan_ranks": 0, "loss_rank_avg": 0.18912896513938904, "step": 1520, "valid_targets_mean": 3740.2, "valid_targets_min": 1103 }, { "epoch": 2.44, "grad_norm": 0.44552157761164163, "learning_rate": 2.4322732475886953e-05, "loss": 0.3425, "loss_nan_ranks": 0, "loss_rank_avg": 0.196616530418396, "step": 1525, "valid_targets_mean": 5088.4, "valid_targets_min": 1383 }, { "epoch": 2.448, "grad_norm": 0.5732608370374113, "learning_rate": 2.4213585399914528e-05, "loss": 0.3386, "loss_nan_ranks": 0, "loss_rank_avg": 0.261408269405365, "step": 1530, "valid_targets_mean": 5127.4, "valid_targets_min": 914 }, { "epoch": 2.456, "grad_norm": 0.4646397657685872, "learning_rate": 2.4104306843912687e-05, "loss": 0.3481, "loss_nan_ranks": 0, "loss_rank_avg": 0.19430388510227203, "step": 1535, "valid_targets_mean": 4647.1, "valid_targets_min": 841 }, { "epoch": 2.464, "grad_norm": 0.44219115003302095, "learning_rate": 2.3994900217791615e-05, "loss": 0.3248, "loss_nan_ranks": 0, "loss_rank_avg": 0.14268314838409424, "step": 1540, "valid_targets_mean": 3976.5, "valid_targets_min": 697 }, { "epoch": 2.472, "grad_norm": 0.4933434455561328, "learning_rate": 2.3885368935457762e-05, "loss": 0.3596, "loss_nan_ranks": 0, "loss_rank_avg": 0.16441547870635986, "step": 1545, "valid_targets_mean": 3575.2, "valid_targets_min": 823 }, { "epoch": 2.48, "grad_norm": 0.4326667267917598, "learning_rate": 2.3775716414707355e-05, "loss": 0.328, "loss_nan_ranks": 0, "loss_rank_avg": 0.1232375055551529, "step": 1550, "valid_targets_mean": 3318.1, "valid_targets_min": 1079 }, { "epoch": 2.488, "grad_norm": 0.429583180648258, "learning_rate": 2.36659460771197e-05, "loss": 0.3812, "loss_nan_ranks": 0, "loss_rank_avg": 0.07942848652601242, "step": 1555, "valid_targets_mean": 2053.8, "valid_targets_min": 653 }, { "epoch": 2.496, "grad_norm": 0.5473918418436651, "learning_rate": 2.3556061347950455e-05, "loss": 0.3418, "loss_nan_ranks": 0, "loss_rank_avg": 0.20061814785003662, "step": 1560, "valid_targets_mean": 3775.9, "valid_targets_min": 1127 }, { "epoch": 2.504, "grad_norm": 0.6024361371215083, "learning_rate": 2.3446065656024734e-05, "loss": 0.3522, "loss_nan_ranks": 0, "loss_rank_avg": 0.21523049473762512, "step": 1565, "valid_targets_mean": 2751.6, "valid_targets_min": 708 }, { "epoch": 2.512, "grad_norm": 0.5006491647087972, "learning_rate": 2.33359624336301e-05, "loss": 0.3123, "loss_nan_ranks": 0, "loss_rank_avg": 0.09982918202877045, "step": 1570, "valid_targets_mean": 1937.8, "valid_targets_min": 1018 }, { "epoch": 2.52, "grad_norm": 0.36371194764317616, "learning_rate": 2.3225755116409497e-05, "loss": 0.3093, "loss_nan_ranks": 0, "loss_rank_avg": 0.17913754284381866, "step": 1575, "valid_targets_mean": 7189.9, "valid_targets_min": 686 }, { "epoch": 2.528, "grad_norm": 0.5048673985779927, "learning_rate": 2.311544714325403e-05, "loss": 0.3441, "loss_nan_ranks": 0, "loss_rank_avg": 0.1666538417339325, "step": 1580, "valid_targets_mean": 3370.5, "valid_targets_min": 819 }, { "epoch": 2.536, "grad_norm": 0.5182580499328722, "learning_rate": 2.300504195619563e-05, "loss": 0.3615, "loss_nan_ranks": 0, "loss_rank_avg": 0.2155158817768097, "step": 1585, "valid_targets_mean": 4026.4, "valid_targets_min": 695 }, { "epoch": 2.544, "grad_norm": 0.468514133638622, "learning_rate": 2.2894543000299697e-05, "loss": 0.3323, "loss_nan_ranks": 0, "loss_rank_avg": 0.24576425552368164, "step": 1590, "valid_targets_mean": 5894.2, "valid_targets_min": 942 }, { "epoch": 2.552, "grad_norm": 0.4884711618253095, "learning_rate": 2.2783953723557572e-05, "loss": 0.342, "loss_nan_ranks": 0, "loss_rank_avg": 0.16744929552078247, "step": 1595, "valid_targets_mean": 3803.2, "valid_targets_min": 927 }, { "epoch": 2.56, "grad_norm": 0.5999751684945661, "learning_rate": 2.2673277576778946e-05, "loss": 0.3547, "loss_nan_ranks": 0, "loss_rank_avg": 0.21919560432434082, "step": 1600, "valid_targets_mean": 2878.8, "valid_targets_min": 994 }, { "epoch": 2.568, "grad_norm": 0.5423569314618256, "learning_rate": 2.2562518013484208e-05, "loss": 0.3769, "loss_nan_ranks": 0, "loss_rank_avg": 0.24968698620796204, "step": 1605, "valid_targets_mean": 3738.9, "valid_targets_min": 887 }, { "epoch": 2.576, "grad_norm": 0.5562360071258139, "learning_rate": 2.245167848979664e-05, "loss": 0.3489, "loss_nan_ranks": 0, "loss_rank_avg": 0.18698713183403015, "step": 1610, "valid_targets_mean": 2892.8, "valid_targets_min": 1438 }, { "epoch": 2.584, "grad_norm": 0.5326819387179283, "learning_rate": 2.23407624643346e-05, "loss": 0.3371, "loss_nan_ranks": 0, "loss_rank_avg": 0.16616696119308472, "step": 1615, "valid_targets_mean": 3816.0, "valid_targets_min": 906 }, { "epoch": 2.592, "grad_norm": 0.49126894683922456, "learning_rate": 2.2229773398103606e-05, "loss": 0.3507, "loss_nan_ranks": 0, "loss_rank_avg": 0.16989147663116455, "step": 1620, "valid_targets_mean": 3596.0, "valid_targets_min": 937 }, { "epoch": 2.6, "grad_norm": 0.41955570335468, "learning_rate": 2.2118714754388323e-05, "loss": 0.3514, "loss_nan_ranks": 0, "loss_rank_avg": 0.17922857403755188, "step": 1625, "valid_targets_mean": 5167.9, "valid_targets_min": 1261 }, { "epoch": 2.608, "grad_norm": 0.4466338468560662, "learning_rate": 2.200758999864449e-05, "loss": 0.3404, "loss_nan_ranks": 0, "loss_rank_avg": 0.23674200475215912, "step": 1630, "valid_targets_mean": 6806.6, "valid_targets_min": 1415 }, { "epoch": 2.616, "grad_norm": 0.42959952004699986, "learning_rate": 2.1896402598390818e-05, "loss": 0.3535, "loss_nan_ranks": 0, "loss_rank_avg": 0.18198445439338684, "step": 1635, "valid_targets_mean": 4071.1, "valid_targets_min": 948 }, { "epoch": 2.624, "grad_norm": 0.6276829545612954, "learning_rate": 2.178515602310074e-05, "loss": 0.3686, "loss_nan_ranks": 0, "loss_rank_avg": 0.3186021149158478, "step": 1640, "valid_targets_mean": 4211.4, "valid_targets_min": 1217 }, { "epoch": 2.632, "grad_norm": 0.4505510976461976, "learning_rate": 2.1673853744094193e-05, "loss": 0.3974, "loss_nan_ranks": 0, "loss_rank_avg": 0.29543089866638184, "step": 1645, "valid_targets_mean": 7278.0, "valid_targets_min": 1524 }, { "epoch": 2.64, "grad_norm": 0.37719949029802746, "learning_rate": 2.1562499234429283e-05, "loss": 0.3246, "loss_nan_ranks": 0, "loss_rank_avg": 0.15694212913513184, "step": 1650, "valid_targets_mean": 6930.1, "valid_targets_min": 1011 }, { "epoch": 2.648, "grad_norm": 0.46205859204161104, "learning_rate": 2.1451095968793908e-05, "loss": 0.339, "loss_nan_ranks": 0, "loss_rank_avg": 0.149054616689682, "step": 1655, "valid_targets_mean": 3161.5, "valid_targets_min": 965 }, { "epoch": 2.656, "grad_norm": 0.5109521277662005, "learning_rate": 2.1339647423397337e-05, "loss": 0.3685, "loss_nan_ranks": 0, "loss_rank_avg": 0.1951621025800705, "step": 1660, "valid_targets_mean": 4541.2, "valid_targets_min": 775 }, { "epoch": 2.664, "grad_norm": 0.5496498798895945, "learning_rate": 2.122815707586176e-05, "loss": 0.3509, "loss_nan_ranks": 0, "loss_rank_avg": 0.19943974912166595, "step": 1665, "valid_targets_mean": 3634.1, "valid_targets_min": 895 }, { "epoch": 2.672, "grad_norm": 0.5440227120286691, "learning_rate": 2.111662840511373e-05, "loss": 0.3516, "loss_nan_ranks": 0, "loss_rank_avg": 0.1294553130865097, "step": 1670, "valid_targets_mean": 2321.6, "valid_targets_min": 661 }, { "epoch": 2.68, "grad_norm": 0.49077600939023164, "learning_rate": 2.1005064891275638e-05, "loss": 0.3567, "loss_nan_ranks": 0, "loss_rank_avg": 0.14773976802825928, "step": 1675, "valid_targets_mean": 4320.6, "valid_targets_min": 886 }, { "epoch": 2.6879999999999997, "grad_norm": 0.391281755034438, "learning_rate": 2.0893470015557126e-05, "loss": 0.3396, "loss_nan_ranks": 0, "loss_rank_avg": 0.10046662390232086, "step": 1680, "valid_targets_mean": 5169.5, "valid_targets_min": 982 }, { "epoch": 2.6959999999999997, "grad_norm": 0.45801761659477147, "learning_rate": 2.078184726014643e-05, "loss": 0.3712, "loss_nan_ranks": 0, "loss_rank_avg": 0.1757431924343109, "step": 1685, "valid_targets_mean": 4211.1, "valid_targets_min": 1628 }, { "epoch": 2.7039999999999997, "grad_norm": 0.4549840031695572, "learning_rate": 2.0670200108101754e-05, "loss": 0.3328, "loss_nan_ranks": 0, "loss_rank_avg": 0.1375683844089508, "step": 1690, "valid_targets_mean": 3498.6, "valid_targets_min": 1023 }, { "epoch": 2.7119999999999997, "grad_norm": 0.4915990617682205, "learning_rate": 2.0558532043242557e-05, "loss": 0.3437, "loss_nan_ranks": 0, "loss_rank_avg": 0.0870099812746048, "step": 1695, "valid_targets_mean": 3573.2, "valid_targets_min": 574 }, { "epoch": 2.7199999999999998, "grad_norm": 0.5924797281596321, "learning_rate": 2.0446846550040863e-05, "loss": 0.3685, "loss_nan_ranks": 0, "loss_rank_avg": 0.22033245861530304, "step": 1700, "valid_targets_mean": 2877.0, "valid_targets_min": 854 }, { "epoch": 2.7279999999999998, "grad_norm": 0.43877107974753327, "learning_rate": 2.033514711351253e-05, "loss": 0.3527, "loss_nan_ranks": 0, "loss_rank_avg": 0.15773963928222656, "step": 1705, "valid_targets_mean": 4485.2, "valid_targets_min": 941 }, { "epoch": 2.7359999999999998, "grad_norm": 0.530664944625799, "learning_rate": 2.022343721910851e-05, "loss": 0.3421, "loss_nan_ranks": 0, "loss_rank_avg": 0.2027740180492401, "step": 1710, "valid_targets_mean": 4494.0, "valid_targets_min": 1226 }, { "epoch": 2.7439999999999998, "grad_norm": 0.6038027202052175, "learning_rate": 2.0111720352606054e-05, "loss": 0.3601, "loss_nan_ranks": 0, "loss_rank_avg": 0.19138199090957642, "step": 1715, "valid_targets_mean": 3350.9, "valid_targets_min": 1106 }, { "epoch": 2.752, "grad_norm": 0.48781174342181044, "learning_rate": 2e-05, "loss": 0.3113, "loss_nan_ranks": 0, "loss_rank_avg": 0.19703620672225952, "step": 1720, "valid_targets_mean": 5906.1, "valid_targets_min": 1657 }, { "epoch": 2.76, "grad_norm": 0.4902966583040599, "learning_rate": 1.988827964739395e-05, "loss": 0.3293, "loss_nan_ranks": 0, "loss_rank_avg": 0.12021969258785248, "step": 1725, "valid_targets_mean": 2399.0, "valid_targets_min": 897 }, { "epoch": 2.768, "grad_norm": 0.47146153893588105, "learning_rate": 1.9776562780891494e-05, "loss": 0.3446, "loss_nan_ranks": 0, "loss_rank_avg": 0.14058835804462433, "step": 1730, "valid_targets_mean": 2738.2, "valid_targets_min": 1074 }, { "epoch": 2.776, "grad_norm": 0.5388251301985688, "learning_rate": 1.966485288648747e-05, "loss": 0.3284, "loss_nan_ranks": 0, "loss_rank_avg": 0.16873209178447723, "step": 1735, "valid_targets_mean": 3595.6, "valid_targets_min": 844 }, { "epoch": 2.784, "grad_norm": 0.5085083642233609, "learning_rate": 1.9553153449959144e-05, "loss": 0.3548, "loss_nan_ranks": 0, "loss_rank_avg": 0.17369981110095978, "step": 1740, "valid_targets_mean": 4477.5, "valid_targets_min": 905 }, { "epoch": 2.792, "grad_norm": 0.49910923546702074, "learning_rate": 1.9441467956757453e-05, "loss": 0.3631, "loss_nan_ranks": 0, "loss_rank_avg": 0.12397250533103943, "step": 1745, "valid_targets_mean": 2318.6, "valid_targets_min": 868 }, { "epoch": 2.8, "grad_norm": 0.48862405925011343, "learning_rate": 1.9329799891898256e-05, "loss": 0.3353, "loss_nan_ranks": 0, "loss_rank_avg": 0.23336371779441833, "step": 1750, "valid_targets_mean": 4532.2, "valid_targets_min": 1037 }, { "epoch": 2.808, "grad_norm": 0.39497314431233704, "learning_rate": 1.9218152739853576e-05, "loss": 0.3471, "loss_nan_ranks": 0, "loss_rank_avg": 0.1946973204612732, "step": 1755, "valid_targets_mean": 5582.1, "valid_targets_min": 710 }, { "epoch": 2.816, "grad_norm": 0.5003296582395104, "learning_rate": 1.9106529984442884e-05, "loss": 0.3275, "loss_nan_ranks": 0, "loss_rank_avg": 0.2435501217842102, "step": 1760, "valid_targets_mean": 5183.2, "valid_targets_min": 1061 }, { "epoch": 2.824, "grad_norm": 0.52781859107085, "learning_rate": 1.8994935108724366e-05, "loss": 0.345, "loss_nan_ranks": 0, "loss_rank_avg": 0.24607789516448975, "step": 1765, "valid_targets_mean": 4977.9, "valid_targets_min": 668 }, { "epoch": 2.832, "grad_norm": 0.4870668187855425, "learning_rate": 1.8883371594886276e-05, "loss": 0.3378, "loss_nan_ranks": 0, "loss_rank_avg": 0.17322807013988495, "step": 1770, "valid_targets_mean": 4143.9, "valid_targets_min": 721 }, { "epoch": 2.84, "grad_norm": 0.5501480651217011, "learning_rate": 1.877184292413824e-05, "loss": 0.3385, "loss_nan_ranks": 0, "loss_rank_avg": 0.1610127091407776, "step": 1775, "valid_targets_mean": 2323.1, "valid_targets_min": 760 }, { "epoch": 2.848, "grad_norm": 0.5665295596550164, "learning_rate": 1.8660352576602663e-05, "loss": 0.344, "loss_nan_ranks": 0, "loss_rank_avg": 0.23653550446033478, "step": 1780, "valid_targets_mean": 4046.8, "valid_targets_min": 559 }, { "epoch": 2.856, "grad_norm": 0.47013239118348343, "learning_rate": 1.8548904031206102e-05, "loss": 0.3472, "loss_nan_ranks": 0, "loss_rank_avg": 0.20719118416309357, "step": 1785, "valid_targets_mean": 5973.1, "valid_targets_min": 824 }, { "epoch": 2.864, "grad_norm": 0.4773248045425617, "learning_rate": 1.843750076557072e-05, "loss": 0.3382, "loss_nan_ranks": 0, "loss_rank_avg": 0.18604245781898499, "step": 1790, "valid_targets_mean": 5011.6, "valid_targets_min": 1589 }, { "epoch": 2.872, "grad_norm": 0.7432332329422374, "learning_rate": 1.832614625590581e-05, "loss": 0.3387, "loss_nan_ranks": 0, "loss_rank_avg": 0.20564137399196625, "step": 1795, "valid_targets_mean": 2076.8, "valid_targets_min": 701 }, { "epoch": 2.88, "grad_norm": 0.5780630575919011, "learning_rate": 1.8214843976899264e-05, "loss": 0.3475, "loss_nan_ranks": 0, "loss_rank_avg": 0.168882817029953, "step": 1800, "valid_targets_mean": 2690.0, "valid_targets_min": 973 }, { "epoch": 2.888, "grad_norm": 0.4048713565752578, "learning_rate": 1.810359740160919e-05, "loss": 0.3294, "loss_nan_ranks": 0, "loss_rank_avg": 0.11568081378936768, "step": 1805, "valid_targets_mean": 4304.0, "valid_targets_min": 1228 }, { "epoch": 2.896, "grad_norm": 0.5039084001370419, "learning_rate": 1.7992410001355515e-05, "loss": 0.3471, "loss_nan_ranks": 0, "loss_rank_avg": 0.1639002561569214, "step": 1810, "valid_targets_mean": 3705.6, "valid_targets_min": 678 }, { "epoch": 2.904, "grad_norm": 0.8029006281339147, "learning_rate": 1.788128524561168e-05, "loss": 0.342, "loss_nan_ranks": 0, "loss_rank_avg": 0.15965096652507782, "step": 1815, "valid_targets_mean": 1893.2, "valid_targets_min": 908 }, { "epoch": 2.912, "grad_norm": 0.45687805144339416, "learning_rate": 1.7770226601896397e-05, "loss": 0.3296, "loss_nan_ranks": 0, "loss_rank_avg": 0.1370457112789154, "step": 1820, "valid_targets_mean": 3719.6, "valid_targets_min": 1436 }, { "epoch": 2.92, "grad_norm": 0.4584149777451423, "learning_rate": 1.7659237535665404e-05, "loss": 0.3318, "loss_nan_ranks": 0, "loss_rank_avg": 0.18669384717941284, "step": 1825, "valid_targets_mean": 4329.4, "valid_targets_min": 792 }, { "epoch": 2.928, "grad_norm": 0.3857893662873705, "learning_rate": 1.754832151020337e-05, "loss": 0.3181, "loss_nan_ranks": 0, "loss_rank_avg": 0.17508453130722046, "step": 1830, "valid_targets_mean": 6344.2, "valid_targets_min": 807 }, { "epoch": 2.936, "grad_norm": 0.5909250437931737, "learning_rate": 1.74374819865158e-05, "loss": 0.318, "loss_nan_ranks": 0, "loss_rank_avg": 0.1460743248462677, "step": 1835, "valid_targets_mean": 2246.0, "valid_targets_min": 1028 }, { "epoch": 2.944, "grad_norm": 0.6168729718884929, "learning_rate": 1.7326722423221057e-05, "loss": 0.3609, "loss_nan_ranks": 0, "loss_rank_avg": 0.16383513808250427, "step": 1840, "valid_targets_mean": 2735.9, "valid_targets_min": 1176 }, { "epoch": 2.952, "grad_norm": 0.35833838993310363, "learning_rate": 1.7216046276442438e-05, "loss": 0.3415, "loss_nan_ranks": 0, "loss_rank_avg": 0.1925455927848816, "step": 1845, "valid_targets_mean": 8259.6, "valid_targets_min": 1127 }, { "epoch": 2.96, "grad_norm": 0.5568115528689965, "learning_rate": 1.7105456999700306e-05, "loss": 0.3607, "loss_nan_ranks": 0, "loss_rank_avg": 0.18549929559230804, "step": 1850, "valid_targets_mean": 3148.5, "valid_targets_min": 829 }, { "epoch": 2.968, "grad_norm": 0.44855376408233205, "learning_rate": 1.6994958043804374e-05, "loss": 0.3437, "loss_nan_ranks": 0, "loss_rank_avg": 0.14252299070358276, "step": 1855, "valid_targets_mean": 3142.0, "valid_targets_min": 1217 }, { "epoch": 2.976, "grad_norm": 0.4728038820881367, "learning_rate": 1.6884552856745972e-05, "loss": 0.3444, "loss_nan_ranks": 0, "loss_rank_avg": 0.10633358359336853, "step": 1860, "valid_targets_mean": 1938.1, "valid_targets_min": 506 }, { "epoch": 2.984, "grad_norm": 0.6465001523766998, "learning_rate": 1.6774244883590503e-05, "loss": 0.3675, "loss_nan_ranks": 0, "loss_rank_avg": 0.1333983838558197, "step": 1865, "valid_targets_mean": 1810.0, "valid_targets_min": 721 }, { "epoch": 2.992, "grad_norm": 0.42614834103402127, "learning_rate": 1.6664037566369905e-05, "loss": 0.3441, "loss_nan_ranks": 0, "loss_rank_avg": 0.14628413319587708, "step": 1870, "valid_targets_mean": 5585.8, "valid_targets_min": 1323 }, { "epoch": 3.0, "grad_norm": 0.5627282905947615, "learning_rate": 1.6553934343975273e-05, "loss": 0.365, "loss_nan_ranks": 0, "loss_rank_avg": 0.25714242458343506, "step": 1875, "valid_targets_mean": 4970.8, "valid_targets_min": 1718 }, { "epoch": 3.008, "grad_norm": 0.5335261046503706, "learning_rate": 1.644393865204955e-05, "loss": 0.3588, "loss_nan_ranks": 0, "loss_rank_avg": 0.12273615598678589, "step": 1880, "valid_targets_mean": 3116.5, "valid_targets_min": 1219 }, { "epoch": 3.016, "grad_norm": 1.132349702215561, "learning_rate": 1.6334053922880304e-05, "loss": 0.3097, "loss_nan_ranks": 0, "loss_rank_avg": 0.12334040552377701, "step": 1885, "valid_targets_mean": 5509.8, "valid_targets_min": 1485 }, { "epoch": 3.024, "grad_norm": 0.44315501434081866, "learning_rate": 1.622428358529265e-05, "loss": 0.3306, "loss_nan_ranks": 0, "loss_rank_avg": 0.1480335295200348, "step": 1890, "valid_targets_mean": 3953.9, "valid_targets_min": 596 }, { "epoch": 3.032, "grad_norm": 0.588510096981289, "learning_rate": 1.611463106454224e-05, "loss": 0.3392, "loss_nan_ranks": 0, "loss_rank_avg": 0.23434148728847504, "step": 1895, "valid_targets_mean": 4087.4, "valid_targets_min": 1260 }, { "epoch": 3.04, "grad_norm": 0.42621894302985963, "learning_rate": 1.6005099782208392e-05, "loss": 0.3477, "loss_nan_ranks": 0, "loss_rank_avg": 0.18396449089050293, "step": 1900, "valid_targets_mean": 5456.5, "valid_targets_min": 2731 }, { "epoch": 3.048, "grad_norm": 0.41823653199244526, "learning_rate": 1.5895693156087317e-05, "loss": 0.3079, "loss_nan_ranks": 0, "loss_rank_avg": 0.19087563455104828, "step": 1905, "valid_targets_mean": 6455.0, "valid_targets_min": 1013 }, { "epoch": 3.056, "grad_norm": 0.4782619422592544, "learning_rate": 1.578641460008548e-05, "loss": 0.3172, "loss_nan_ranks": 0, "loss_rank_avg": 0.1430778205394745, "step": 1910, "valid_targets_mean": 3750.5, "valid_targets_min": 725 }, { "epoch": 3.064, "grad_norm": 0.4189615311261732, "learning_rate": 1.5677267524113054e-05, "loss": 0.3169, "loss_nan_ranks": 0, "loss_rank_avg": 0.15594616532325745, "step": 1915, "valid_targets_mean": 6085.0, "valid_targets_min": 1033 }, { "epoch": 3.072, "grad_norm": 0.4552341804156781, "learning_rate": 1.5568255333977547e-05, "loss": 0.2908, "loss_nan_ranks": 0, "loss_rank_avg": 0.1645529717206955, "step": 1920, "valid_targets_mean": 4930.0, "valid_targets_min": 1100 }, { "epoch": 3.08, "grad_norm": 0.4123722916613195, "learning_rate": 1.5459381431277506e-05, "loss": 0.326, "loss_nan_ranks": 0, "loss_rank_avg": 0.10328075289726257, "step": 1925, "valid_targets_mean": 4879.5, "valid_targets_min": 886 }, { "epoch": 3.088, "grad_norm": 0.49773690652553637, "learning_rate": 1.5350649213296373e-05, "loss": 0.3312, "loss_nan_ranks": 0, "loss_rank_avg": 0.1240905225276947, "step": 1930, "valid_targets_mean": 2668.1, "valid_targets_min": 936 }, { "epoch": 3.096, "grad_norm": 0.45278449933874565, "learning_rate": 1.5242062072896483e-05, "loss": 0.3371, "loss_nan_ranks": 0, "loss_rank_avg": 0.13505850732326508, "step": 1935, "valid_targets_mean": 3849.6, "valid_targets_min": 845 }, { "epoch": 3.104, "grad_norm": 0.3913986266762585, "learning_rate": 1.5133623398413209e-05, "loss": 0.3189, "loss_nan_ranks": 0, "loss_rank_avg": 0.1345825046300888, "step": 1940, "valid_targets_mean": 4995.1, "valid_targets_min": 1052 }, { "epoch": 3.112, "grad_norm": 0.6573102578659725, "learning_rate": 1.50253365735492e-05, "loss": 0.3329, "loss_nan_ranks": 0, "loss_rank_avg": 0.13540002703666687, "step": 1945, "valid_targets_mean": 1855.5, "valid_targets_min": 617 }, { "epoch": 3.12, "grad_norm": 0.5353571868182, "learning_rate": 1.4917204977268833e-05, "loss": 0.3487, "loss_nan_ranks": 0, "loss_rank_avg": 0.10793092101812363, "step": 1950, "valid_targets_mean": 2389.6, "valid_targets_min": 499 }, { "epoch": 3.128, "grad_norm": 0.45964730669624526, "learning_rate": 1.4809231983692733e-05, "loss": 0.3234, "loss_nan_ranks": 0, "loss_rank_avg": 0.18516826629638672, "step": 1955, "valid_targets_mean": 6327.5, "valid_targets_min": 1400 }, { "epoch": 3.136, "grad_norm": 0.5326551897217631, "learning_rate": 1.4701420961992533e-05, "loss": 0.309, "loss_nan_ranks": 0, "loss_rank_avg": 0.11213398724794388, "step": 1960, "valid_targets_mean": 2593.1, "valid_targets_min": 638 }, { "epoch": 3.144, "grad_norm": 0.5727183007723465, "learning_rate": 1.459377527628571e-05, "loss": 0.3177, "loss_nan_ranks": 0, "loss_rank_avg": 0.0960133746266365, "step": 1965, "valid_targets_mean": 1934.0, "valid_targets_min": 576 }, { "epoch": 3.152, "grad_norm": 0.5747836643926282, "learning_rate": 1.4486298285530634e-05, "loss": 0.3613, "loss_nan_ranks": 0, "loss_rank_avg": 0.2422075867652893, "step": 1970, "valid_targets_mean": 4018.4, "valid_targets_min": 722 }, { "epoch": 3.16, "grad_norm": 0.4200083023418681, "learning_rate": 1.4378993343421736e-05, "loss": 0.3153, "loss_nan_ranks": 0, "loss_rank_avg": 0.13149552047252655, "step": 1975, "valid_targets_mean": 4988.6, "valid_targets_min": 1289 }, { "epoch": 3.168, "grad_norm": 0.5127779132992235, "learning_rate": 1.4271863798284877e-05, "loss": 0.3095, "loss_nan_ranks": 0, "loss_rank_avg": 0.12630169093608856, "step": 1980, "valid_targets_mean": 2393.5, "valid_targets_min": 690 }, { "epoch": 3.176, "grad_norm": 0.5792388346676679, "learning_rate": 1.4164912992972846e-05, "loss": 0.3446, "loss_nan_ranks": 0, "loss_rank_avg": 0.16788384318351746, "step": 1985, "valid_targets_mean": 3161.4, "valid_targets_min": 829 }, { "epoch": 3.184, "grad_norm": 0.5717202482455881, "learning_rate": 1.4058144264761087e-05, "loss": 0.2997, "loss_nan_ranks": 0, "loss_rank_avg": 0.19649925827980042, "step": 1990, "valid_targets_mean": 4664.4, "valid_targets_min": 955 }, { "epoch": 3.192, "grad_norm": 0.6151627913778978, "learning_rate": 1.3951560945243517e-05, "loss": 0.3316, "loss_nan_ranks": 0, "loss_rank_avg": 0.16047929227352142, "step": 1995, "valid_targets_mean": 2772.8, "valid_targets_min": 793 }, { "epoch": 3.2, "grad_norm": 0.4242933168481296, "learning_rate": 1.3845166360228597e-05, "loss": 0.3652, "loss_nan_ranks": 0, "loss_rank_avg": 0.19517377018928528, "step": 2000, "valid_targets_mean": 6388.5, "valid_targets_min": 495 }, { "epoch": 3.208, "grad_norm": 0.5003469604574386, "learning_rate": 1.3738963829635559e-05, "loss": 0.3156, "loss_nan_ranks": 0, "loss_rank_avg": 0.16463890671730042, "step": 2005, "valid_targets_mean": 4910.6, "valid_targets_min": 833 }, { "epoch": 3.216, "grad_norm": 0.9508204216990079, "learning_rate": 1.3632956667390784e-05, "loss": 0.2938, "loss_nan_ranks": 0, "loss_rank_avg": 0.14386102557182312, "step": 2010, "valid_targets_mean": 3383.9, "valid_targets_min": 1221 }, { "epoch": 3.224, "grad_norm": 0.47315611427798243, "learning_rate": 1.3527148181324425e-05, "loss": 0.3215, "loss_nan_ranks": 0, "loss_rank_avg": 0.1463852822780609, "step": 2015, "valid_targets_mean": 4139.1, "valid_targets_min": 881 }, { "epoch": 3.232, "grad_norm": 0.4910524255424862, "learning_rate": 1.3421541673067168e-05, "loss": 0.3058, "loss_nan_ranks": 0, "loss_rank_avg": 0.19467991590499878, "step": 2020, "valid_targets_mean": 5570.2, "valid_targets_min": 1334 }, { "epoch": 3.24, "grad_norm": 0.42714613992182937, "learning_rate": 1.3316140437947207e-05, "loss": 0.2985, "loss_nan_ranks": 0, "loss_rank_avg": 0.12120620906352997, "step": 2025, "valid_targets_mean": 4442.6, "valid_targets_min": 704 }, { "epoch": 3.248, "grad_norm": 0.5255009932566614, "learning_rate": 1.321094776488745e-05, "loss": 0.3184, "loss_nan_ranks": 0, "loss_rank_avg": 0.1309448778629303, "step": 2030, "valid_targets_mean": 3026.9, "valid_targets_min": 999 }, { "epoch": 3.2560000000000002, "grad_norm": 0.530757098061631, "learning_rate": 1.3105966936302856e-05, "loss": 0.3272, "loss_nan_ranks": 0, "loss_rank_avg": 0.1335388720035553, "step": 2035, "valid_targets_mean": 3075.5, "valid_targets_min": 1200 }, { "epoch": 3.2640000000000002, "grad_norm": 0.4322808474453205, "learning_rate": 1.3001201227998023e-05, "loss": 0.3245, "loss_nan_ranks": 0, "loss_rank_avg": 0.17669960856437683, "step": 2040, "valid_targets_mean": 8023.5, "valid_targets_min": 574 }, { "epoch": 3.2720000000000002, "grad_norm": 0.45545029411121973, "learning_rate": 1.2896653909064964e-05, "loss": 0.3091, "loss_nan_ranks": 0, "loss_rank_avg": 0.12387904524803162, "step": 2045, "valid_targets_mean": 4796.9, "valid_targets_min": 944 }, { "epoch": 3.2800000000000002, "grad_norm": 0.4647985673919204, "learning_rate": 1.2792328241781124e-05, "loss": 0.3246, "loss_nan_ranks": 0, "loss_rank_avg": 0.13686639070510864, "step": 2050, "valid_targets_mean": 4213.1, "valid_targets_min": 430 }, { "epoch": 3.288, "grad_norm": 0.638531208310754, "learning_rate": 1.2688227481507546e-05, "loss": 0.3168, "loss_nan_ranks": 0, "loss_rank_avg": 0.12405982613563538, "step": 2055, "valid_targets_mean": 3198.2, "valid_targets_min": 834 }, { "epoch": 3.296, "grad_norm": 0.3881187620470557, "learning_rate": 1.258435487658733e-05, "loss": 0.3283, "loss_nan_ranks": 0, "loss_rank_avg": 0.14476531744003296, "step": 2060, "valid_targets_mean": 4399.9, "valid_targets_min": 971 }, { "epoch": 3.304, "grad_norm": 0.4425558771523959, "learning_rate": 1.2480713668244243e-05, "loss": 0.3007, "loss_nan_ranks": 0, "loss_rank_avg": 0.14966867864131927, "step": 2065, "valid_targets_mean": 5938.1, "valid_targets_min": 1014 }, { "epoch": 3.312, "grad_norm": 0.47898598677849, "learning_rate": 1.2377307090481586e-05, "loss": 0.3288, "loss_nan_ranks": 0, "loss_rank_avg": 0.13591575622558594, "step": 2070, "valid_targets_mean": 4385.5, "valid_targets_min": 721 }, { "epoch": 3.32, "grad_norm": 0.6460674905120861, "learning_rate": 1.2274138369981298e-05, "loss": 0.3089, "loss_nan_ranks": 0, "loss_rank_avg": 0.23174312710762024, "step": 2075, "valid_targets_mean": 3097.0, "valid_targets_min": 630 }, { "epoch": 3.328, "grad_norm": 0.42734464278344975, "learning_rate": 1.2171210726003256e-05, "loss": 0.3276, "loss_nan_ranks": 0, "loss_rank_avg": 0.16050082445144653, "step": 2080, "valid_targets_mean": 5419.9, "valid_targets_min": 867 }, { "epoch": 3.336, "grad_norm": 0.47895472971752917, "learning_rate": 1.2068527370284815e-05, "loss": 0.3176, "loss_nan_ranks": 0, "loss_rank_avg": 0.16464433073997498, "step": 2085, "valid_targets_mean": 3824.9, "valid_targets_min": 1169 }, { "epoch": 3.344, "grad_norm": 0.4240551447146959, "learning_rate": 1.1966091506940616e-05, "loss": 0.3274, "loss_nan_ranks": 0, "loss_rank_avg": 0.11462630331516266, "step": 2090, "valid_targets_mean": 4184.4, "valid_targets_min": 832 }, { "epoch": 3.352, "grad_norm": 0.7394655211812231, "learning_rate": 1.1863906332362569e-05, "loss": 0.3628, "loss_nan_ranks": 0, "loss_rank_avg": 0.1140076294541359, "step": 2095, "valid_targets_mean": 2578.2, "valid_targets_min": 786 }, { "epoch": 3.36, "grad_norm": 0.41922280281116975, "learning_rate": 1.176197503512015e-05, "loss": 0.3148, "loss_nan_ranks": 0, "loss_rank_avg": 0.159539595246315, "step": 2100, "valid_targets_mean": 5751.0, "valid_targets_min": 808 }, { "epoch": 3.368, "grad_norm": 0.5584401926293026, "learning_rate": 1.1660300795860877e-05, "loss": 0.3253, "loss_nan_ranks": 0, "loss_rank_avg": 0.1747303605079651, "step": 2105, "valid_targets_mean": 2999.6, "valid_targets_min": 834 }, { "epoch": 3.376, "grad_norm": 0.4166954589757251, "learning_rate": 1.1558886787211071e-05, "loss": 0.2865, "loss_nan_ranks": 0, "loss_rank_avg": 0.1294165849685669, "step": 2110, "valid_targets_mean": 4363.6, "valid_targets_min": 546 }, { "epoch": 3.384, "grad_norm": 0.5662294561036999, "learning_rate": 1.1457736173676883e-05, "loss": 0.3209, "loss_nan_ranks": 0, "loss_rank_avg": 0.12514197826385498, "step": 2115, "valid_targets_mean": 2388.6, "valid_targets_min": 974 }, { "epoch": 3.392, "grad_norm": 0.5059434791101876, "learning_rate": 1.1356852111545493e-05, "loss": 0.3481, "loss_nan_ranks": 0, "loss_rank_avg": 0.16026899218559265, "step": 2120, "valid_targets_mean": 3593.6, "valid_targets_min": 1325 }, { "epoch": 3.4, "grad_norm": 0.5844883650373187, "learning_rate": 1.1256237748786675e-05, "loss": 0.3326, "loss_nan_ranks": 0, "loss_rank_avg": 0.22520673274993896, "step": 2125, "valid_targets_mean": 3685.5, "valid_targets_min": 379 }, { "epoch": 3.408, "grad_norm": 0.5009787440262468, "learning_rate": 1.1155896224954543e-05, "loss": 0.3357, "loss_nan_ranks": 0, "loss_rank_avg": 0.12661316990852356, "step": 2130, "valid_targets_mean": 3547.5, "valid_targets_min": 816 }, { "epoch": 3.416, "grad_norm": 0.48678971220314976, "learning_rate": 1.1055830671089578e-05, "loss": 0.3183, "loss_nan_ranks": 0, "loss_rank_avg": 0.16513441503047943, "step": 2135, "valid_targets_mean": 5887.6, "valid_targets_min": 814 }, { "epoch": 3.424, "grad_norm": 0.5206971097737679, "learning_rate": 1.0956044209620966e-05, "loss": 0.3357, "loss_nan_ranks": 0, "loss_rank_avg": 0.13408246636390686, "step": 2140, "valid_targets_mean": 3378.6, "valid_targets_min": 598 }, { "epoch": 3.432, "grad_norm": 0.47966318102705424, "learning_rate": 1.0856539954269121e-05, "loss": 0.323, "loss_nan_ranks": 0, "loss_rank_avg": 0.16585497558116913, "step": 2145, "valid_targets_mean": 4378.1, "valid_targets_min": 798 }, { "epoch": 3.44, "grad_norm": 0.5122761624297089, "learning_rate": 1.0757321009948543e-05, "loss": 0.3438, "loss_nan_ranks": 0, "loss_rank_avg": 0.12634073197841644, "step": 2150, "valid_targets_mean": 3119.5, "valid_targets_min": 975 }, { "epoch": 3.448, "grad_norm": 0.6182750943275146, "learning_rate": 1.0658390472670938e-05, "loss": 0.327, "loss_nan_ranks": 0, "loss_rank_avg": 0.1473451852798462, "step": 2155, "valid_targets_mean": 2101.5, "valid_targets_min": 843 }, { "epoch": 3.456, "grad_norm": 0.4888023729480413, "learning_rate": 1.0559751429448597e-05, "loss": 0.3408, "loss_nan_ranks": 0, "loss_rank_avg": 0.21524950861930847, "step": 2160, "valid_targets_mean": 6050.9, "valid_targets_min": 634 }, { "epoch": 3.464, "grad_norm": 0.4542480312052559, "learning_rate": 1.0461406958198101e-05, "loss": 0.321, "loss_nan_ranks": 0, "loss_rank_avg": 0.12608930468559265, "step": 2165, "valid_targets_mean": 4816.9, "valid_targets_min": 879 }, { "epoch": 3.472, "grad_norm": 0.5112301560829176, "learning_rate": 1.0363360127644235e-05, "loss": 0.3444, "loss_nan_ranks": 0, "loss_rank_avg": 0.21885031461715698, "step": 2170, "valid_targets_mean": 6240.8, "valid_targets_min": 1414 }, { "epoch": 3.48, "grad_norm": 0.5442050920516187, "learning_rate": 1.0265613997224255e-05, "loss": 0.3139, "loss_nan_ranks": 0, "loss_rank_avg": 0.1261734962463379, "step": 2175, "valid_targets_mean": 3223.0, "valid_targets_min": 902 }, { "epoch": 3.488, "grad_norm": 0.4610573972043477, "learning_rate": 1.0168171616992422e-05, "loss": 0.3175, "loss_nan_ranks": 0, "loss_rank_avg": 0.19308221340179443, "step": 2180, "valid_targets_mean": 6308.8, "valid_targets_min": 823 }, { "epoch": 3.496, "grad_norm": 0.5306677534892053, "learning_rate": 1.007103602752483e-05, "loss": 0.3402, "loss_nan_ranks": 0, "loss_rank_avg": 0.1896430402994156, "step": 2185, "valid_targets_mean": 3996.8, "valid_targets_min": 671 }, { "epoch": 3.504, "grad_norm": 0.5044165875993919, "learning_rate": 9.974210259824505e-06, "loss": 0.3336, "loss_nan_ranks": 0, "loss_rank_avg": 0.16915544867515564, "step": 2190, "valid_targets_mean": 3629.6, "valid_targets_min": 1033 }, { "epoch": 3.512, "grad_norm": 0.6879187845686496, "learning_rate": 9.877697335226872e-06, "loss": 0.3439, "loss_nan_ranks": 0, "loss_rank_avg": 0.10717498511075974, "step": 2195, "valid_targets_mean": 2186.5, "valid_targets_min": 687 }, { "epoch": 3.52, "grad_norm": 0.6250839989818192, "learning_rate": 9.781500265305448e-06, "loss": 0.3357, "loss_nan_ranks": 0, "loss_rank_avg": 0.1784055531024933, "step": 2200, "valid_targets_mean": 3010.9, "valid_targets_min": 720 }, { "epoch": 3.528, "grad_norm": 0.5248629910937874, "learning_rate": 9.685622051777856e-06, "loss": 0.3092, "loss_nan_ranks": 0, "loss_rank_avg": 0.23266291618347168, "step": 2205, "valid_targets_mean": 5086.4, "valid_targets_min": 787 }, { "epoch": 3.536, "grad_norm": 0.4834179595767226, "learning_rate": 9.590065686412182e-06, "loss": 0.3233, "loss_nan_ranks": 0, "loss_rank_avg": 0.13310745358467102, "step": 2210, "valid_targets_mean": 2614.6, "valid_targets_min": 925 }, { "epoch": 3.544, "grad_norm": 0.45602768383743014, "learning_rate": 9.494834150933616e-06, "loss": 0.3297, "loss_nan_ranks": 0, "loss_rank_avg": 0.1372455358505249, "step": 2215, "valid_targets_mean": 3843.4, "valid_targets_min": 1099 }, { "epoch": 3.552, "grad_norm": 0.4887447107244825, "learning_rate": 9.399930416931404e-06, "loss": 0.314, "loss_nan_ranks": 0, "loss_rank_avg": 0.10885452479124069, "step": 2220, "valid_targets_mean": 3725.2, "valid_targets_min": 872 }, { "epoch": 3.56, "grad_norm": 0.5151465841953303, "learning_rate": 9.30535744576615e-06, "loss": 0.3353, "loss_nan_ranks": 0, "loss_rank_avg": 0.13782528042793274, "step": 2225, "valid_targets_mean": 3750.6, "valid_targets_min": 1124 }, { "epoch": 3.568, "grad_norm": 0.5623056848959576, "learning_rate": 9.211118188477362e-06, "loss": 0.3311, "loss_nan_ranks": 0, "loss_rank_avg": 0.2320508360862732, "step": 2230, "valid_targets_mean": 4508.8, "valid_targets_min": 662 }, { "epoch": 3.576, "grad_norm": 0.5178467063324498, "learning_rate": 9.117215585691408e-06, "loss": 0.3228, "loss_nan_ranks": 0, "loss_rank_avg": 0.17334342002868652, "step": 2235, "valid_targets_mean": 4636.2, "valid_targets_min": 916 }, { "epoch": 3.584, "grad_norm": 0.6120278120276946, "learning_rate": 9.023652567529744e-06, "loss": 0.3263, "loss_nan_ranks": 0, "loss_rank_avg": 0.11503614485263824, "step": 2240, "valid_targets_mean": 2788.6, "valid_targets_min": 588 }, { "epoch": 3.592, "grad_norm": 0.4937083479890421, "learning_rate": 8.930432053517465e-06, "loss": 0.3191, "loss_nan_ranks": 0, "loss_rank_avg": 0.11475160717964172, "step": 2245, "valid_targets_mean": 2826.0, "valid_targets_min": 957 }, { "epoch": 3.6, "grad_norm": 0.49253074619254694, "learning_rate": 8.837556952492264e-06, "loss": 0.3207, "loss_nan_ranks": 0, "loss_rank_avg": 0.14227665960788727, "step": 2250, "valid_targets_mean": 4518.2, "valid_targets_min": 925 }, { "epoch": 3.608, "grad_norm": 0.7363097537758285, "learning_rate": 8.745030162513582e-06, "loss": 0.3292, "loss_nan_ranks": 0, "loss_rank_avg": 0.21330423653125763, "step": 2255, "valid_targets_mean": 3404.9, "valid_targets_min": 874 }, { "epoch": 3.616, "grad_norm": 0.4414100721223384, "learning_rate": 8.652854570772236e-06, "loss": 0.3232, "loss_nan_ranks": 0, "loss_rank_avg": 0.12081056833267212, "step": 2260, "valid_targets_mean": 4392.4, "valid_targets_min": 984 }, { "epoch": 3.624, "grad_norm": 0.5606075707240097, "learning_rate": 8.561033053500312e-06, "loss": 0.3213, "loss_nan_ranks": 0, "loss_rank_avg": 0.08863774687051773, "step": 2265, "valid_targets_mean": 1930.2, "valid_targets_min": 685 }, { "epoch": 3.632, "grad_norm": 0.47128304721158626, "learning_rate": 8.46956847588141e-06, "loss": 0.3523, "loss_nan_ranks": 0, "loss_rank_avg": 0.12015935778617859, "step": 2270, "valid_targets_mean": 3377.5, "valid_targets_min": 815 }, { "epoch": 3.64, "grad_norm": 0.5125340858044971, "learning_rate": 8.378463691961237e-06, "loss": 0.2898, "loss_nan_ranks": 0, "loss_rank_avg": 0.14381183683872223, "step": 2275, "valid_targets_mean": 3659.1, "valid_targets_min": 491 }, { "epoch": 3.648, "grad_norm": 0.4592889282450072, "learning_rate": 8.287721544558574e-06, "loss": 0.3185, "loss_nan_ranks": 0, "loss_rank_avg": 0.10181337594985962, "step": 2280, "valid_targets_mean": 3632.9, "valid_targets_min": 728 }, { "epoch": 3.656, "grad_norm": 0.5965684017173326, "learning_rate": 8.197344865176548e-06, "loss": 0.3197, "loss_nan_ranks": 0, "loss_rank_avg": 0.12914949655532837, "step": 2285, "valid_targets_mean": 3181.9, "valid_targets_min": 1078 }, { "epoch": 3.664, "grad_norm": 0.6228428043708024, "learning_rate": 8.10733647391427e-06, "loss": 0.355, "loss_nan_ranks": 0, "loss_rank_avg": 0.1971237063407898, "step": 2290, "valid_targets_mean": 3335.1, "valid_targets_min": 791 }, { "epoch": 3.672, "grad_norm": 0.5979530685752266, "learning_rate": 8.017699179378849e-06, "loss": 0.3396, "loss_nan_ranks": 0, "loss_rank_avg": 0.22264941036701202, "step": 2295, "valid_targets_mean": 3038.4, "valid_targets_min": 830 }, { "epoch": 3.68, "grad_norm": 0.4458519642564752, "learning_rate": 7.928435778597763e-06, "loss": 0.3209, "loss_nan_ranks": 0, "loss_rank_avg": 0.1323288083076477, "step": 2300, "valid_targets_mean": 3766.8, "valid_targets_min": 874 }, { "epoch": 3.6879999999999997, "grad_norm": 0.5205119846178907, "learning_rate": 7.839549056931557e-06, "loss": 0.3348, "loss_nan_ranks": 0, "loss_rank_avg": 0.16188280284404755, "step": 2305, "valid_targets_mean": 3806.9, "valid_targets_min": 1001 }, { "epoch": 3.6959999999999997, "grad_norm": 0.6321612059563577, "learning_rate": 7.751041787986965e-06, "loss": 0.3147, "loss_nan_ranks": 0, "loss_rank_avg": 0.20028778910636902, "step": 2310, "valid_targets_mean": 5492.1, "valid_targets_min": 905 }, { "epoch": 3.7039999999999997, "grad_norm": 0.4856071600672378, "learning_rate": 7.662916733530317e-06, "loss": 0.2987, "loss_nan_ranks": 0, "loss_rank_avg": 0.20045016705989838, "step": 2315, "valid_targets_mean": 5160.9, "valid_targets_min": 1459 }, { "epoch": 3.7119999999999997, "grad_norm": 0.6190037743730318, "learning_rate": 7.575176643401394e-06, "loss": 0.3144, "loss_nan_ranks": 0, "loss_rank_avg": 0.19084815680980682, "step": 2320, "valid_targets_mean": 3074.1, "valid_targets_min": 740 }, { "epoch": 3.7199999999999998, "grad_norm": 0.47802774359013706, "learning_rate": 7.487824255427616e-06, "loss": 0.3462, "loss_nan_ranks": 0, "loss_rank_avg": 0.2085719108581543, "step": 2325, "valid_targets_mean": 5742.2, "valid_targets_min": 1804 }, { "epoch": 3.7279999999999998, "grad_norm": 0.6141982017666223, "learning_rate": 7.400862295338595e-06, "loss": 0.319, "loss_nan_ranks": 0, "loss_rank_avg": 0.1338297575712204, "step": 2330, "valid_targets_mean": 3026.8, "valid_targets_min": 869 }, { "epoch": 3.7359999999999998, "grad_norm": 0.4272699620447183, "learning_rate": 7.314293476681122e-06, "loss": 0.3269, "loss_nan_ranks": 0, "loss_rank_avg": 0.12799906730651855, "step": 2335, "valid_targets_mean": 5400.1, "valid_targets_min": 920 }, { "epoch": 3.7439999999999998, "grad_norm": 0.558531390187902, "learning_rate": 7.228120500734443e-06, "loss": 0.3094, "loss_nan_ranks": 0, "loss_rank_avg": 0.1416444331407547, "step": 2340, "valid_targets_mean": 3904.5, "valid_targets_min": 665 }, { "epoch": 3.752, "grad_norm": 0.5957360903717863, "learning_rate": 7.1423460564259995e-06, "loss": 0.3211, "loss_nan_ranks": 0, "loss_rank_avg": 0.1542271375656128, "step": 2345, "valid_targets_mean": 2498.1, "valid_targets_min": 725 }, { "epoch": 3.76, "grad_norm": 0.5749982706805087, "learning_rate": 7.056972820247516e-06, "loss": 0.3403, "loss_nan_ranks": 0, "loss_rank_avg": 0.24424612522125244, "step": 2350, "valid_targets_mean": 4599.0, "valid_targets_min": 586 }, { "epoch": 3.768, "grad_norm": 0.7004621373727494, "learning_rate": 6.97200345617149e-06, "loss": 0.3132, "loss_nan_ranks": 0, "loss_rank_avg": 0.261954128742218, "step": 2355, "valid_targets_mean": 3538.9, "valid_targets_min": 911 }, { "epoch": 3.776, "grad_norm": 0.4631600705700879, "learning_rate": 6.887440615568044e-06, "loss": 0.3387, "loss_nan_ranks": 0, "loss_rank_avg": 0.13582396507263184, "step": 2360, "valid_targets_mean": 4712.9, "valid_targets_min": 1222 }, { "epoch": 3.784, "grad_norm": 0.5495931240245518, "learning_rate": 6.803286937122233e-06, "loss": 0.3371, "loss_nan_ranks": 0, "loss_rank_avg": 0.23861850798130035, "step": 2365, "valid_targets_mean": 4753.6, "valid_targets_min": 980 }, { "epoch": 3.792, "grad_norm": 0.6024280451059767, "learning_rate": 6.719545046751674e-06, "loss": 0.3315, "loss_nan_ranks": 0, "loss_rank_avg": 0.21030789613723755, "step": 2370, "valid_targets_mean": 2878.9, "valid_targets_min": 895 }, { "epoch": 3.8, "grad_norm": 0.6147492284481915, "learning_rate": 6.636217557524605e-06, "loss": 0.3522, "loss_nan_ranks": 0, "loss_rank_avg": 0.161615788936615, "step": 2375, "valid_targets_mean": 2665.0, "valid_targets_min": 634 }, { "epoch": 3.808, "grad_norm": 0.5623305812954921, "learning_rate": 6.55330706957837e-06, "loss": 0.3327, "loss_nan_ranks": 0, "loss_rank_avg": 0.2057649940252304, "step": 2380, "valid_targets_mean": 4465.9, "valid_targets_min": 1127 }, { "epoch": 3.816, "grad_norm": 0.5283338376179509, "learning_rate": 6.4708161700382655e-06, "loss": 0.3246, "loss_nan_ranks": 0, "loss_rank_avg": 0.1588209867477417, "step": 2385, "valid_targets_mean": 3723.0, "valid_targets_min": 807 }, { "epoch": 3.824, "grad_norm": 0.4510470301147515, "learning_rate": 6.388747432936819e-06, "loss": 0.3195, "loss_nan_ranks": 0, "loss_rank_avg": 0.12806157767772675, "step": 2390, "valid_targets_mean": 3841.9, "valid_targets_min": 981 }, { "epoch": 3.832, "grad_norm": 0.5436971472039873, "learning_rate": 6.3071034191334915e-06, "loss": 0.3228, "loss_nan_ranks": 0, "loss_rank_avg": 0.1942981779575348, "step": 2395, "valid_targets_mean": 3798.4, "valid_targets_min": 732 }, { "epoch": 3.84, "grad_norm": 0.5018428572701472, "learning_rate": 6.22588667623472e-06, "loss": 0.3207, "loss_nan_ranks": 0, "loss_rank_avg": 0.13997101783752441, "step": 2400, "valid_targets_mean": 3812.2, "valid_targets_min": 708 }, { "epoch": 3.848, "grad_norm": 0.582055372382792, "learning_rate": 6.145099738514466e-06, "loss": 0.3218, "loss_nan_ranks": 0, "loss_rank_avg": 0.2213214933872223, "step": 2405, "valid_targets_mean": 3403.4, "valid_targets_min": 819 }, { "epoch": 3.856, "grad_norm": 0.583928362005403, "learning_rate": 6.064745126835112e-06, "loss": 0.3025, "loss_nan_ranks": 0, "loss_rank_avg": 0.16777969896793365, "step": 2410, "valid_targets_mean": 3357.1, "valid_targets_min": 860 }, { "epoch": 3.864, "grad_norm": 0.6142754993162113, "learning_rate": 5.984825348568812e-06, "loss": 0.3007, "loss_nan_ranks": 0, "loss_rank_avg": 0.19814524054527283, "step": 2415, "valid_targets_mean": 2603.5, "valid_targets_min": 831 }, { "epoch": 3.872, "grad_norm": 0.4195895918414985, "learning_rate": 5.905342897519262e-06, "loss": 0.33, "loss_nan_ranks": 0, "loss_rank_avg": 0.12075480818748474, "step": 2420, "valid_targets_mean": 4224.9, "valid_targets_min": 551 }, { "epoch": 3.88, "grad_norm": 0.6108959292753345, "learning_rate": 5.826300253843851e-06, "loss": 0.3366, "loss_nan_ranks": 0, "loss_rank_avg": 0.15773876011371613, "step": 2425, "valid_targets_mean": 2816.1, "valid_targets_min": 930 }, { "epoch": 3.888, "grad_norm": 0.5171359185903159, "learning_rate": 5.7476998839763035e-06, "loss": 0.2989, "loss_nan_ranks": 0, "loss_rank_avg": 0.13886040449142456, "step": 2430, "valid_targets_mean": 3425.9, "valid_targets_min": 789 }, { "epoch": 3.896, "grad_norm": 0.4763178741386556, "learning_rate": 5.669544240549698e-06, "loss": 0.3051, "loss_nan_ranks": 0, "loss_rank_avg": 0.16396956145763397, "step": 2435, "valid_targets_mean": 4035.2, "valid_targets_min": 893 }, { "epoch": 3.904, "grad_norm": 0.4785442559038061, "learning_rate": 5.591835762319946e-06, "loss": 0.3111, "loss_nan_ranks": 0, "loss_rank_avg": 0.11912888288497925, "step": 2440, "valid_targets_mean": 2858.9, "valid_targets_min": 566 }, { "epoch": 3.912, "grad_norm": 0.6233825072887711, "learning_rate": 5.514576874089683e-06, "loss": 0.3413, "loss_nan_ranks": 0, "loss_rank_avg": 0.1519429087638855, "step": 2445, "valid_targets_mean": 3033.6, "valid_targets_min": 559 }, { "epoch": 3.92, "grad_norm": 0.5322851898274968, "learning_rate": 5.437769986632622e-06, "loss": 0.341, "loss_nan_ranks": 0, "loss_rank_avg": 0.1855153739452362, "step": 2450, "valid_targets_mean": 4399.1, "valid_targets_min": 1451 }, { "epoch": 3.928, "grad_norm": 0.44237959812688865, "learning_rate": 5.361417496618315e-06, "loss": 0.3117, "loss_nan_ranks": 0, "loss_rank_avg": 0.18377932906150818, "step": 2455, "valid_targets_mean": 6480.9, "valid_targets_min": 1407 }, { "epoch": 3.936, "grad_norm": 0.7143654129319893, "learning_rate": 5.285521786537368e-06, "loss": 0.3282, "loss_nan_ranks": 0, "loss_rank_avg": 0.15518513321876526, "step": 2460, "valid_targets_mean": 4235.6, "valid_targets_min": 1211 }, { "epoch": 3.944, "grad_norm": 0.4769151407862085, "learning_rate": 5.2100852246270975e-06, "loss": 0.3146, "loss_nan_ranks": 0, "loss_rank_avg": 0.21277496218681335, "step": 2465, "valid_targets_mean": 5719.1, "valid_targets_min": 829 }, { "epoch": 3.952, "grad_norm": 0.6838406068253756, "learning_rate": 5.135110164797637e-06, "loss": 0.3305, "loss_nan_ranks": 0, "loss_rank_avg": 0.3378908634185791, "step": 2470, "valid_targets_mean": 3928.5, "valid_targets_min": 664 }, { "epoch": 3.96, "grad_norm": 0.6130883409393146, "learning_rate": 5.060598946558484e-06, "loss": 0.3349, "loss_nan_ranks": 0, "loss_rank_avg": 0.13703122735023499, "step": 2475, "valid_targets_mean": 2553.0, "valid_targets_min": 834 }, { "epoch": 3.968, "grad_norm": 0.4357023208751496, "learning_rate": 4.986553894945512e-06, "loss": 0.2976, "loss_nan_ranks": 0, "loss_rank_avg": 0.16759523749351501, "step": 2480, "valid_targets_mean": 4775.6, "valid_targets_min": 736 }, { "epoch": 3.976, "grad_norm": 0.638026763808748, "learning_rate": 4.912977320448391e-06, "loss": 0.3393, "loss_nan_ranks": 0, "loss_rank_avg": 0.2706812620162964, "step": 2485, "valid_targets_mean": 4390.4, "valid_targets_min": 1011 }, { "epoch": 3.984, "grad_norm": 0.6395434082611862, "learning_rate": 4.839871518938513e-06, "loss": 0.354, "loss_nan_ranks": 0, "loss_rank_avg": 0.18830779194831848, "step": 2490, "valid_targets_mean": 3367.8, "valid_targets_min": 1225 }, { "epoch": 3.992, "grad_norm": 0.6045521377506226, "learning_rate": 4.767238771597347e-06, "loss": 0.3432, "loss_nan_ranks": 0, "loss_rank_avg": 0.167062908411026, "step": 2495, "valid_targets_mean": 2106.9, "valid_targets_min": 555 }, { "epoch": 4.0, "grad_norm": 0.5970487451114057, "learning_rate": 4.695081344845254e-06, "loss": 0.3183, "loss_nan_ranks": 0, "loss_rank_avg": 0.15452514588832855, "step": 2500, "valid_targets_mean": 3056.4, "valid_targets_min": 506 }, { "epoch": 4.008, "grad_norm": 0.4416015542929956, "learning_rate": 4.623401490270778e-06, "loss": 0.3266, "loss_nan_ranks": 0, "loss_rank_avg": 0.1425788402557373, "step": 2505, "valid_targets_mean": 4244.4, "valid_targets_min": 1097 }, { "epoch": 4.016, "grad_norm": 0.6050783805568698, "learning_rate": 4.552201444560373e-06, "loss": 0.3079, "loss_nan_ranks": 0, "loss_rank_avg": 0.13076642155647278, "step": 2510, "valid_targets_mean": 3324.1, "valid_targets_min": 1050 }, { "epoch": 4.024, "grad_norm": 0.7064956382591235, "learning_rate": 4.481483429428615e-06, "loss": 0.3301, "loss_nan_ranks": 0, "loss_rank_avg": 0.13634774088859558, "step": 2515, "valid_targets_mean": 2014.1, "valid_targets_min": 940 }, { "epoch": 4.032, "grad_norm": 0.49373001353281315, "learning_rate": 4.4112496515488765e-06, "loss": 0.2945, "loss_nan_ranks": 0, "loss_rank_avg": 0.15392452478408813, "step": 2520, "valid_targets_mean": 3495.6, "valid_targets_min": 467 }, { "epoch": 4.04, "grad_norm": 0.5279529664688594, "learning_rate": 4.341502302484472e-06, "loss": 0.3109, "loss_nan_ranks": 0, "loss_rank_avg": 0.19533191621303558, "step": 2525, "valid_targets_mean": 4519.1, "valid_targets_min": 1176 }, { "epoch": 4.048, "grad_norm": 0.5002800878652447, "learning_rate": 4.272243558620264e-06, "loss": 0.2897, "loss_nan_ranks": 0, "loss_rank_avg": 0.14053988456726074, "step": 2530, "valid_targets_mean": 3120.2, "valid_targets_min": 1102 }, { "epoch": 4.056, "grad_norm": 0.6522314422022066, "learning_rate": 4.203475581094771e-06, "loss": 0.3082, "loss_nan_ranks": 0, "loss_rank_avg": 0.20415924489498138, "step": 2535, "valid_targets_mean": 3365.0, "valid_targets_min": 1171 }, { "epoch": 4.064, "grad_norm": 0.3871114184970602, "learning_rate": 4.135200515732716e-06, "loss": 0.3235, "loss_nan_ranks": 0, "loss_rank_avg": 0.13306647539138794, "step": 2540, "valid_targets_mean": 6466.8, "valid_targets_min": 1014 }, { "epoch": 4.072, "grad_norm": 0.5275110629270116, "learning_rate": 4.067420492978065e-06, "loss": 0.3177, "loss_nan_ranks": 0, "loss_rank_avg": 0.19687163829803467, "step": 2545, "valid_targets_mean": 4931.0, "valid_targets_min": 1092 }, { "epoch": 4.08, "grad_norm": 0.6130234035544103, "learning_rate": 4.000137627827554e-06, "loss": 0.3209, "loss_nan_ranks": 0, "loss_rank_avg": 0.16655772924423218, "step": 2550, "valid_targets_mean": 2530.0, "valid_targets_min": 1073 }, { "epoch": 4.088, "grad_norm": 0.38814066490279997, "learning_rate": 3.9333540197647035e-06, "loss": 0.2997, "loss_nan_ranks": 0, "loss_rank_avg": 0.16146323084831238, "step": 2555, "valid_targets_mean": 7966.8, "valid_targets_min": 1103 }, { "epoch": 4.096, "grad_norm": 0.4590471476508371, "learning_rate": 3.867071752694282e-06, "loss": 0.304, "loss_nan_ranks": 0, "loss_rank_avg": 0.1653611660003662, "step": 2560, "valid_targets_mean": 4412.4, "valid_targets_min": 602 }, { "epoch": 4.104, "grad_norm": 0.5272846889528702, "learning_rate": 3.8012928948773243e-06, "loss": 0.3173, "loss_nan_ranks": 0, "loss_rank_avg": 0.18114957213401794, "step": 2565, "valid_targets_mean": 4442.1, "valid_targets_min": 1003 }, { "epoch": 4.112, "grad_norm": 0.5186772440314499, "learning_rate": 3.7360194988665364e-06, "loss": 0.2818, "loss_nan_ranks": 0, "loss_rank_avg": 0.13564951717853546, "step": 2570, "valid_targets_mean": 3933.4, "valid_targets_min": 1004 }, { "epoch": 4.12, "grad_norm": 0.4328083139216796, "learning_rate": 3.6712536014422885e-06, "loss": 0.3033, "loss_nan_ranks": 0, "loss_rank_avg": 0.12977717816829681, "step": 2575, "valid_targets_mean": 5225.9, "valid_targets_min": 871 }, { "epoch": 4.128, "grad_norm": 0.6420338275118762, "learning_rate": 3.606997223549049e-06, "loss": 0.3142, "loss_nan_ranks": 0, "loss_rank_avg": 0.20889759063720703, "step": 2580, "valid_targets_mean": 3776.1, "valid_targets_min": 844 }, { "epoch": 4.136, "grad_norm": 0.5245741332938453, "learning_rate": 3.543252370232313e-06, "loss": 0.3056, "loss_nan_ranks": 0, "loss_rank_avg": 0.1626463085412979, "step": 2585, "valid_targets_mean": 4243.8, "valid_targets_min": 1076 }, { "epoch": 4.144, "grad_norm": 0.6009260362686207, "learning_rate": 3.4800210305760662e-06, "loss": 0.3222, "loss_nan_ranks": 0, "loss_rank_avg": 0.20430579781532288, "step": 2590, "valid_targets_mean": 4037.8, "valid_targets_min": 872 }, { "epoch": 4.152, "grad_norm": 0.6474293755424925, "learning_rate": 3.4173051776406817e-06, "loss": 0.334, "loss_nan_ranks": 0, "loss_rank_avg": 0.17541387677192688, "step": 2595, "valid_targets_mean": 3601.9, "valid_targets_min": 709 }, { "epoch": 4.16, "grad_norm": 0.5299682943737077, "learning_rate": 3.3551067684013706e-06, "loss": 0.3194, "loss_nan_ranks": 0, "loss_rank_avg": 0.17366911470890045, "step": 2600, "valid_targets_mean": 4566.0, "valid_targets_min": 797 }, { "epoch": 4.168, "grad_norm": 0.5343201098430208, "learning_rate": 3.2934277436871187e-06, "loss": 0.3082, "loss_nan_ranks": 0, "loss_rank_avg": 0.18967169523239136, "step": 2605, "valid_targets_mean": 5175.8, "valid_targets_min": 803 }, { "epoch": 4.176, "grad_norm": 0.706532685786007, "learning_rate": 3.232270028120121e-06, "loss": 0.3535, "loss_nan_ranks": 0, "loss_rank_avg": 0.22153635323047638, "step": 2610, "valid_targets_mean": 3667.5, "valid_targets_min": 1042 }, { "epoch": 4.184, "grad_norm": 0.6583861635037094, "learning_rate": 3.1716355300557256e-06, "loss": 0.3241, "loss_nan_ranks": 0, "loss_rank_avg": 0.21890048682689667, "step": 2615, "valid_targets_mean": 3347.5, "valid_targets_min": 993 }, { "epoch": 4.192, "grad_norm": 0.4972479392885182, "learning_rate": 3.111526141522896e-06, "loss": 0.2942, "loss_nan_ranks": 0, "loss_rank_avg": 0.23314082622528076, "step": 2620, "valid_targets_mean": 5941.9, "valid_targets_min": 960 }, { "epoch": 4.2, "grad_norm": 0.42587188064481973, "learning_rate": 3.0519437381651507e-06, "loss": 0.3007, "loss_nan_ranks": 0, "loss_rank_avg": 0.13505974411964417, "step": 2625, "valid_targets_mean": 4900.6, "valid_targets_min": 1129 }, { "epoch": 4.208, "grad_norm": 0.568223927683394, "learning_rate": 2.992890179182062e-06, "loss": 0.3146, "loss_nan_ranks": 0, "loss_rank_avg": 0.19689098000526428, "step": 2630, "valid_targets_mean": 4559.5, "valid_targets_min": 930 }, { "epoch": 4.216, "grad_norm": 0.3833405874743467, "learning_rate": 2.93436730727122e-06, "loss": 0.3283, "loss_nan_ranks": 0, "loss_rank_avg": 0.21802669763565063, "step": 2635, "valid_targets_mean": 13101.0, "valid_targets_min": 704 }, { "epoch": 4.224, "grad_norm": 0.4360867692750178, "learning_rate": 2.8763769485707447e-06, "loss": 0.3004, "loss_nan_ranks": 0, "loss_rank_avg": 0.1010446846485138, "step": 2640, "valid_targets_mean": 3420.9, "valid_targets_min": 694 }, { "epoch": 4.232, "grad_norm": 0.5394563271831259, "learning_rate": 2.818920912602294e-06, "loss": 0.3053, "loss_nan_ranks": 0, "loss_rank_avg": 0.08956138789653778, "step": 2645, "valid_targets_mean": 2421.5, "valid_targets_min": 799 }, { "epoch": 4.24, "grad_norm": 0.5364665203536897, "learning_rate": 2.762000992214626e-06, "loss": 0.3032, "loss_nan_ranks": 0, "loss_rank_avg": 0.13476410508155823, "step": 2650, "valid_targets_mean": 4055.1, "valid_targets_min": 751 }, { "epoch": 4.248, "grad_norm": 0.6310235572259564, "learning_rate": 2.7056189635276162e-06, "loss": 0.3064, "loss_nan_ranks": 0, "loss_rank_avg": 0.2394050657749176, "step": 2655, "valid_targets_mean": 4607.8, "valid_targets_min": 691 }, { "epoch": 4.256, "grad_norm": 0.5141156610492511, "learning_rate": 2.6497765858768643e-06, "loss": 0.3108, "loss_nan_ranks": 0, "loss_rank_avg": 0.19690141081809998, "step": 2660, "valid_targets_mean": 4163.4, "valid_targets_min": 1429 }, { "epoch": 4.264, "grad_norm": 0.60664896327067, "learning_rate": 2.594475601758786e-06, "loss": 0.3045, "loss_nan_ranks": 0, "loss_rank_avg": 0.21876487135887146, "step": 2665, "valid_targets_mean": 3978.0, "valid_targets_min": 1162 }, { "epoch": 4.272, "grad_norm": 0.586279917157946, "learning_rate": 2.539717736776237e-06, "loss": 0.3262, "loss_nan_ranks": 0, "loss_rank_avg": 0.15984392166137695, "step": 2670, "valid_targets_mean": 3679.0, "valid_targets_min": 819 }, { "epoch": 4.28, "grad_norm": 0.5679469104180861, "learning_rate": 2.4855046995846844e-06, "loss": 0.2905, "loss_nan_ranks": 0, "loss_rank_avg": 0.1231120154261589, "step": 2675, "valid_targets_mean": 2242.6, "valid_targets_min": 596 }, { "epoch": 4.288, "grad_norm": 0.5442418621257851, "learning_rate": 2.431838181838868e-06, "loss": 0.3294, "loss_nan_ranks": 0, "loss_rank_avg": 0.15020456910133362, "step": 2680, "valid_targets_mean": 4034.4, "valid_targets_min": 746 }, { "epoch": 4.296, "grad_norm": 0.6563588104632252, "learning_rate": 2.3787198581400285e-06, "loss": 0.3679, "loss_nan_ranks": 0, "loss_rank_avg": 0.19800585508346558, "step": 2685, "valid_targets_mean": 3281.4, "valid_targets_min": 716 }, { "epoch": 4.304, "grad_norm": 0.6587761603032072, "learning_rate": 2.3261513859836437e-06, "loss": 0.3175, "loss_nan_ranks": 0, "loss_rank_avg": 0.2168753743171692, "step": 2690, "valid_targets_mean": 3552.4, "valid_targets_min": 1376 }, { "epoch": 4.312, "grad_norm": 0.5427138569442952, "learning_rate": 2.27413440570772e-06, "loss": 0.2975, "loss_nan_ranks": 0, "loss_rank_avg": 0.13280799984931946, "step": 2695, "valid_targets_mean": 2905.9, "valid_targets_min": 831 }, { "epoch": 4.32, "grad_norm": 0.4228524942163614, "learning_rate": 2.222670540441596e-06, "loss": 0.3268, "loss_nan_ranks": 0, "loss_rank_avg": 0.11188645660877228, "step": 2700, "valid_targets_mean": 3543.2, "valid_targets_min": 1280 }, { "epoch": 4.328, "grad_norm": 0.4899573286523899, "learning_rate": 2.17176139605531e-06, "loss": 0.3425, "loss_nan_ranks": 0, "loss_rank_avg": 0.2020570933818817, "step": 2705, "valid_targets_mean": 5368.1, "valid_targets_min": 704 }, { "epoch": 4.336, "grad_norm": 0.507636144967628, "learning_rate": 2.121408561109466e-06, "loss": 0.3147, "loss_nan_ranks": 0, "loss_rank_avg": 0.18471331894397736, "step": 2710, "valid_targets_mean": 5824.5, "valid_targets_min": 645 }, { "epoch": 4.344, "grad_norm": 0.3901707775687487, "learning_rate": 2.071613606805696e-06, "loss": 0.2986, "loss_nan_ranks": 0, "loss_rank_avg": 0.08599360287189484, "step": 2715, "valid_targets_mean": 3438.6, "valid_targets_min": 700 }, { "epoch": 4.352, "grad_norm": 0.5742155815605969, "learning_rate": 2.0223780869376018e-06, "loss": 0.3148, "loss_nan_ranks": 0, "loss_rank_avg": 0.15944108366966248, "step": 2720, "valid_targets_mean": 3927.5, "valid_targets_min": 1011 }, { "epoch": 4.36, "grad_norm": 0.419179061441119, "learning_rate": 1.9737035378422907e-06, "loss": 0.334, "loss_nan_ranks": 0, "loss_rank_avg": 0.14531302452087402, "step": 2725, "valid_targets_mean": 5146.2, "valid_targets_min": 1062 }, { "epoch": 4.368, "grad_norm": 0.5893213103013761, "learning_rate": 1.925591478352424e-06, "loss": 0.3056, "loss_nan_ranks": 0, "loss_rank_avg": 0.16566333174705505, "step": 2730, "valid_targets_mean": 4073.5, "valid_targets_min": 786 }, { "epoch": 4.376, "grad_norm": 0.5083568367696376, "learning_rate": 1.8780434097488443e-06, "loss": 0.3219, "loss_nan_ranks": 0, "loss_rank_avg": 0.11135862022638321, "step": 2735, "valid_targets_mean": 2977.8, "valid_targets_min": 740 }, { "epoch": 4.384, "grad_norm": 0.5970456690160377, "learning_rate": 1.831060815713699e-06, "loss": 0.3217, "loss_nan_ranks": 0, "loss_rank_avg": 0.15850530564785004, "step": 2740, "valid_targets_mean": 3731.6, "valid_targets_min": 738 }, { "epoch": 4.392, "grad_norm": 0.4352437971339595, "learning_rate": 1.7846451622841643e-06, "loss": 0.3126, "loss_nan_ranks": 0, "loss_rank_avg": 0.16310745477676392, "step": 2745, "valid_targets_mean": 4859.1, "valid_targets_min": 763 }, { "epoch": 4.4, "grad_norm": 0.7220488054568065, "learning_rate": 1.7387978978066988e-06, "loss": 0.3273, "loss_nan_ranks": 0, "loss_rank_avg": 0.12236481159925461, "step": 2750, "valid_targets_mean": 2560.9, "valid_targets_min": 833 }, { "epoch": 4.408, "grad_norm": 0.4482430441589336, "learning_rate": 1.6935204528918347e-06, "loss": 0.3229, "loss_nan_ranks": 0, "loss_rank_avg": 0.15221011638641357, "step": 2755, "valid_targets_mean": 4397.8, "valid_targets_min": 879 }, { "epoch": 4.416, "grad_norm": 0.6616631272031211, "learning_rate": 1.6488142403695651e-06, "loss": 0.3323, "loss_nan_ranks": 0, "loss_rank_avg": 0.2810991704463959, "step": 2760, "valid_targets_mean": 4374.9, "valid_targets_min": 634 }, { "epoch": 4.424, "grad_norm": 0.6231637576743879, "learning_rate": 1.6046806552452254e-06, "loss": 0.3248, "loss_nan_ranks": 0, "loss_rank_avg": 0.1512073576450348, "step": 2765, "valid_targets_mean": 2162.0, "valid_targets_min": 717 }, { "epoch": 4.432, "grad_norm": 0.546121704100876, "learning_rate": 1.5611210746559868e-06, "loss": 0.3282, "loss_nan_ranks": 0, "loss_rank_avg": 0.1441936194896698, "step": 2770, "valid_targets_mean": 3542.0, "valid_targets_min": 916 }, { "epoch": 4.44, "grad_norm": 0.539850864305549, "learning_rate": 1.5181368578278744e-06, "loss": 0.3084, "loss_nan_ranks": 0, "loss_rank_avg": 0.14854803681373596, "step": 2775, "valid_targets_mean": 3258.1, "valid_targets_min": 1298 }, { "epoch": 4.448, "grad_norm": 0.5230541376311952, "learning_rate": 1.4757293460333566e-06, "loss": 0.3112, "loss_nan_ranks": 0, "loss_rank_avg": 0.14559073746204376, "step": 2780, "valid_targets_mean": 3888.8, "valid_targets_min": 896 }, { "epoch": 4.456, "grad_norm": 0.49941833844922867, "learning_rate": 1.4338998625494905e-06, "loss": 0.3233, "loss_nan_ranks": 0, "loss_rank_avg": 0.15301160514354706, "step": 2785, "valid_targets_mean": 4101.0, "valid_targets_min": 871 }, { "epoch": 4.464, "grad_norm": 0.5879287108121554, "learning_rate": 1.3926497126166405e-06, "loss": 0.3202, "loss_nan_ranks": 0, "loss_rank_avg": 0.16979511082172394, "step": 2790, "valid_targets_mean": 4361.5, "valid_targets_min": 843 }, { "epoch": 4.4719999999999995, "grad_norm": 0.491233445234694, "learning_rate": 1.3519801833977298e-06, "loss": 0.3089, "loss_nan_ranks": 0, "loss_rank_avg": 0.16678255796432495, "step": 2795, "valid_targets_mean": 4108.4, "valid_targets_min": 957 }, { "epoch": 4.48, "grad_norm": 0.4766886110365394, "learning_rate": 1.3118925439381003e-06, "loss": 0.2904, "loss_nan_ranks": 0, "loss_rank_avg": 0.12528201937675476, "step": 2800, "valid_targets_mean": 3777.0, "valid_targets_min": 1264 }, { "epoch": 4.4879999999999995, "grad_norm": 0.47462065492059774, "learning_rate": 1.2723880451258918e-06, "loss": 0.3051, "loss_nan_ranks": 0, "loss_rank_avg": 0.17402642965316772, "step": 2805, "valid_targets_mean": 6122.8, "valid_targets_min": 3206 }, { "epoch": 4.496, "grad_norm": 0.4937259588561042, "learning_rate": 1.2334679196530219e-06, "loss": 0.3576, "loss_nan_ranks": 0, "loss_rank_avg": 0.16844210028648376, "step": 2810, "valid_targets_mean": 5449.5, "valid_targets_min": 887 }, { "epoch": 4.504, "grad_norm": 0.5195655183538852, "learning_rate": 1.1951333819767163e-06, "loss": 0.3071, "loss_nan_ranks": 0, "loss_rank_avg": 0.1690118908882141, "step": 2815, "valid_targets_mean": 4369.6, "valid_targets_min": 721 }, { "epoch": 4.5120000000000005, "grad_norm": 0.645008699848017, "learning_rate": 1.157385628281622e-06, "loss": 0.3122, "loss_nan_ranks": 0, "loss_rank_avg": 0.16069874167442322, "step": 2820, "valid_targets_mean": 2885.9, "valid_targets_min": 889 }, { "epoch": 4.52, "grad_norm": 0.46142203647435265, "learning_rate": 1.1202258364424633e-06, "loss": 0.281, "loss_nan_ranks": 0, "loss_rank_avg": 0.10793192684650421, "step": 2825, "valid_targets_mean": 4069.0, "valid_targets_min": 757 }, { "epoch": 4.5280000000000005, "grad_norm": 0.5591910759629778, "learning_rate": 1.0836551659873073e-06, "loss": 0.312, "loss_nan_ranks": 0, "loss_rank_avg": 0.1380782425403595, "step": 2830, "valid_targets_mean": 3617.9, "valid_targets_min": 661 }, { "epoch": 4.536, "grad_norm": 0.5024384662282567, "learning_rate": 1.0476747580613723e-06, "loss": 0.3243, "loss_nan_ranks": 0, "loss_rank_avg": 0.14395056664943695, "step": 2835, "valid_targets_mean": 4670.5, "valid_targets_min": 895 }, { "epoch": 4.5440000000000005, "grad_norm": 0.4960916051732461, "learning_rate": 1.012285735391416e-06, "loss": 0.3108, "loss_nan_ranks": 0, "loss_rank_avg": 0.1764678955078125, "step": 2840, "valid_targets_mean": 4985.9, "valid_targets_min": 404 }, { "epoch": 4.552, "grad_norm": 0.5442146212646635, "learning_rate": 9.774892022507166e-07, "loss": 0.319, "loss_nan_ranks": 0, "loss_rank_avg": 0.13719762861728668, "step": 2845, "valid_targets_mean": 3678.2, "valid_targets_min": 1021 }, { "epoch": 4.5600000000000005, "grad_norm": 0.5874551803498684, "learning_rate": 9.432862444245994e-07, "loss": 0.3123, "loss_nan_ranks": 0, "loss_rank_avg": 0.1982530653476715, "step": 2850, "valid_targets_mean": 3730.6, "valid_targets_min": 800 }, { "epoch": 4.568, "grad_norm": 0.5392307118781249, "learning_rate": 9.096779291765667e-07, "loss": 0.3304, "loss_nan_ranks": 0, "loss_rank_avg": 0.21671076118946075, "step": 2855, "valid_targets_mean": 5178.0, "valid_targets_min": 1265 }, { "epoch": 4.576, "grad_norm": 0.6120562795608593, "learning_rate": 8.766653052149831e-07, "loss": 0.3354, "loss_nan_ranks": 0, "loss_rank_avg": 0.20868843793869019, "step": 2860, "valid_targets_mean": 3738.2, "valid_targets_min": 1599 }, { "epoch": 4.584, "grad_norm": 0.7117821394769693, "learning_rate": 8.442494026603709e-07, "loss": 0.3398, "loss_nan_ranks": 0, "loss_rank_avg": 0.16895297169685364, "step": 2865, "valid_targets_mean": 2438.6, "valid_targets_min": 943 }, { "epoch": 4.592, "grad_norm": 0.6622067516789324, "learning_rate": 8.124312330132423e-07, "loss": 0.3267, "loss_nan_ranks": 0, "loss_rank_avg": 0.11896079778671265, "step": 2870, "valid_targets_mean": 2087.6, "valid_targets_min": 868 }, { "epoch": 4.6, "grad_norm": 0.5844121637950128, "learning_rate": 7.812117891225667e-07, "loss": 0.2815, "loss_nan_ranks": 0, "loss_rank_avg": 0.15796557068824768, "step": 2875, "valid_targets_mean": 3266.6, "valid_targets_min": 1136 }, { "epoch": 4.608, "grad_norm": 0.6396259105660784, "learning_rate": 7.505920451547544e-07, "loss": 0.2991, "loss_nan_ranks": 0, "loss_rank_avg": 0.193174809217453, "step": 2880, "valid_targets_mean": 4270.2, "valid_targets_min": 972 }, { "epoch": 4.616, "grad_norm": 0.45054830264551554, "learning_rate": 7.205729565632947e-07, "loss": 0.3064, "loss_nan_ranks": 0, "loss_rank_avg": 0.11880641430616379, "step": 2885, "valid_targets_mean": 2952.6, "valid_targets_min": 1052 }, { "epoch": 4.624, "grad_norm": 0.5867839344286957, "learning_rate": 6.911554600589121e-07, "loss": 0.3213, "loss_nan_ranks": 0, "loss_rank_avg": 0.1453874111175537, "step": 2890, "valid_targets_mean": 3303.1, "valid_targets_min": 944 }, { "epoch": 4.632, "grad_norm": 0.66861340671234, "learning_rate": 6.62340473580354e-07, "loss": 0.3191, "loss_nan_ranks": 0, "loss_rank_avg": 0.14077623188495636, "step": 2895, "valid_targets_mean": 2540.4, "valid_targets_min": 1033 }, { "epoch": 4.64, "grad_norm": 0.5948561509092989, "learning_rate": 6.341288962657422e-07, "loss": 0.3266, "loss_nan_ranks": 0, "loss_rank_avg": 0.11293820291757584, "step": 2900, "valid_targets_mean": 2276.4, "valid_targets_min": 1000 }, { "epoch": 4.648, "grad_norm": 0.4492558783161307, "learning_rate": 6.06521608424524e-07, "loss": 0.3436, "loss_nan_ranks": 0, "loss_rank_avg": 0.11917877197265625, "step": 2905, "valid_targets_mean": 2935.6, "valid_targets_min": 697 }, { "epoch": 4.656, "grad_norm": 0.5260049148715912, "learning_rate": 5.795194715099905e-07, "loss": 0.3103, "loss_nan_ranks": 0, "loss_rank_avg": 0.11105750501155853, "step": 2910, "valid_targets_mean": 2840.0, "valid_targets_min": 847 }, { "epoch": 4.664, "grad_norm": 0.6187219070281177, "learning_rate": 5.531233280924042e-07, "loss": 0.3064, "loss_nan_ranks": 0, "loss_rank_avg": 0.18402305245399475, "step": 2915, "valid_targets_mean": 3376.0, "valid_targets_min": 899 }, { "epoch": 4.672, "grad_norm": 0.6718889715232024, "learning_rate": 5.273340018327044e-07, "loss": 0.3303, "loss_nan_ranks": 0, "loss_rank_avg": 0.19413933157920837, "step": 2920, "valid_targets_mean": 2698.1, "valid_targets_min": 727 }, { "epoch": 4.68, "grad_norm": 0.6687495999234568, "learning_rate": 5.02152297456806e-07, "loss": 0.3323, "loss_nan_ranks": 0, "loss_rank_avg": 0.19689956307411194, "step": 2925, "valid_targets_mean": 2559.8, "valid_targets_min": 772 }, { "epoch": 4.688, "grad_norm": 0.5256698425754467, "learning_rate": 4.775790007304993e-07, "loss": 0.3013, "loss_nan_ranks": 0, "loss_rank_avg": 0.24560466408729553, "step": 2930, "valid_targets_mean": 6107.5, "valid_targets_min": 972 }, { "epoch": 4.696, "grad_norm": 0.5425974097527478, "learning_rate": 4.5361487843490924e-07, "loss": 0.293, "loss_nan_ranks": 0, "loss_rank_avg": 0.14793461561203003, "step": 2935, "valid_targets_mean": 3443.0, "valid_targets_min": 678 }, { "epoch": 4.704, "grad_norm": 0.4596549583922256, "learning_rate": 4.3026067834258667e-07, "loss": 0.299, "loss_nan_ranks": 0, "loss_rank_avg": 0.16168315708637238, "step": 2940, "valid_targets_mean": 7192.1, "valid_targets_min": 1895 }, { "epoch": 4.712, "grad_norm": 0.52608460848762, "learning_rate": 4.0751712919417484e-07, "loss": 0.3069, "loss_nan_ranks": 0, "loss_rank_avg": 0.20969341695308685, "step": 2945, "valid_targets_mean": 4731.2, "valid_targets_min": 736 }, { "epoch": 4.72, "grad_norm": 0.5530590908411556, "learning_rate": 3.853849406756549e-07, "loss": 0.2896, "loss_nan_ranks": 0, "loss_rank_avg": 0.1489768624305725, "step": 2950, "valid_targets_mean": 3561.1, "valid_targets_min": 1047 }, { "epoch": 4.728, "grad_norm": 0.6253292073895866, "learning_rate": 3.6386480339621886e-07, "loss": 0.3121, "loss_nan_ranks": 0, "loss_rank_avg": 0.15937970578670502, "step": 2955, "valid_targets_mean": 3324.0, "valid_targets_min": 1209 }, { "epoch": 4.736, "grad_norm": 0.4383199488774124, "learning_rate": 3.4295738886670925e-07, "loss": 0.2963, "loss_nan_ranks": 0, "loss_rank_avg": 0.12093428522348404, "step": 2960, "valid_targets_mean": 4741.6, "valid_targets_min": 901 }, { "epoch": 4.744, "grad_norm": 0.5811119572995426, "learning_rate": 3.226633494786668e-07, "loss": 0.3017, "loss_nan_ranks": 0, "loss_rank_avg": 0.11559713631868362, "step": 2965, "valid_targets_mean": 4799.9, "valid_targets_min": 775 }, { "epoch": 4.752, "grad_norm": 0.702215418406445, "learning_rate": 3.0298331848398033e-07, "loss": 0.3151, "loss_nan_ranks": 0, "loss_rank_avg": 0.1337815821170807, "step": 2970, "valid_targets_mean": 2866.6, "valid_targets_min": 865 }, { "epoch": 4.76, "grad_norm": 0.42111483300594843, "learning_rate": 2.839179099751133e-07, "loss": 0.3159, "loss_nan_ranks": 0, "loss_rank_avg": 0.09750033915042877, "step": 2975, "valid_targets_mean": 3362.2, "valid_targets_min": 909 }, { "epoch": 4.768, "grad_norm": 0.6679073549967152, "learning_rate": 2.654677188659549e-07, "loss": 0.2942, "loss_nan_ranks": 0, "loss_rank_avg": 0.24244052171707153, "step": 2980, "valid_targets_mean": 4093.0, "valid_targets_min": 734 }, { "epoch": 4.776, "grad_norm": 0.5819245393210193, "learning_rate": 2.476333208732462e-07, "loss": 0.3427, "loss_nan_ranks": 0, "loss_rank_avg": 0.19281116127967834, "step": 2985, "valid_targets_mean": 3587.6, "valid_targets_min": 808 }, { "epoch": 4.784, "grad_norm": 0.6047753681807744, "learning_rate": 2.3041527249863193e-07, "loss": 0.3198, "loss_nan_ranks": 0, "loss_rank_avg": 0.18199463188648224, "step": 2990, "valid_targets_mean": 4387.0, "valid_targets_min": 764 }, { "epoch": 4.792, "grad_norm": 0.6613784930971137, "learning_rate": 2.1381411101127013e-07, "loss": 0.2938, "loss_nan_ranks": 0, "loss_rank_avg": 0.132035493850708, "step": 2995, "valid_targets_mean": 2644.2, "valid_targets_min": 687 }, { "epoch": 4.8, "grad_norm": 0.5682147375927875, "learning_rate": 1.9783035443108999e-07, "loss": 0.2967, "loss_nan_ranks": 0, "loss_rank_avg": 0.1099662259221077, "step": 3000, "valid_targets_mean": 2433.6, "valid_targets_min": 957 }, { "epoch": 4.808, "grad_norm": 0.4089149079258835, "learning_rate": 1.8246450151261362e-07, "loss": 0.3093, "loss_nan_ranks": 0, "loss_rank_avg": 0.1526375263929367, "step": 3005, "valid_targets_mean": 7613.1, "valid_targets_min": 791 }, { "epoch": 4.816, "grad_norm": 0.55810969874875, "learning_rate": 1.6771703172940635e-07, "loss": 0.3096, "loss_nan_ranks": 0, "loss_rank_avg": 0.1753598004579544, "step": 3010, "valid_targets_mean": 4165.6, "valid_targets_min": 794 }, { "epoch": 4.824, "grad_norm": 0.45869761806907694, "learning_rate": 1.5358840525909967e-07, "loss": 0.3151, "loss_nan_ranks": 0, "loss_rank_avg": 0.12356032431125641, "step": 3015, "valid_targets_mean": 4902.8, "valid_targets_min": 934 }, { "epoch": 4.832, "grad_norm": 0.5650404498859692, "learning_rate": 1.4007906296904072e-07, "loss": 0.3171, "loss_nan_ranks": 0, "loss_rank_avg": 0.13098034262657166, "step": 3020, "valid_targets_mean": 3383.9, "valid_targets_min": 854 }, { "epoch": 4.84, "grad_norm": 0.5184302552337475, "learning_rate": 1.2718942640254084e-07, "loss": 0.3097, "loss_nan_ranks": 0, "loss_rank_avg": 0.16631071269512177, "step": 3025, "valid_targets_mean": 3698.4, "valid_targets_min": 968 }, { "epoch": 4.848, "grad_norm": 0.479932423772461, "learning_rate": 1.1491989776570623e-07, "loss": 0.3022, "loss_nan_ranks": 0, "loss_rank_avg": 0.15011048316955566, "step": 3030, "valid_targets_mean": 4314.4, "valid_targets_min": 1056 }, { "epoch": 4.856, "grad_norm": 0.5789239779860111, "learning_rate": 1.0327085991490127e-07, "loss": 0.3391, "loss_nan_ranks": 0, "loss_rank_avg": 0.11871884763240814, "step": 3035, "valid_targets_mean": 2391.0, "valid_targets_min": 710 }, { "epoch": 4.864, "grad_norm": 0.5899999965939208, "learning_rate": 9.22426763447981e-08, "loss": 0.3145, "loss_nan_ranks": 0, "loss_rank_avg": 0.13772061467170715, "step": 3040, "valid_targets_mean": 2684.2, "valid_targets_min": 844 }, { "epoch": 4.872, "grad_norm": 0.5413636294872867, "learning_rate": 8.183569117703461e-08, "loss": 0.3173, "loss_nan_ranks": 0, "loss_rank_avg": 0.1497182846069336, "step": 3045, "valid_targets_mean": 3171.4, "valid_targets_min": 1127 }, { "epoch": 4.88, "grad_norm": 0.5102986675669349, "learning_rate": 7.205022914946957e-08, "loss": 0.3454, "loss_nan_ranks": 0, "loss_rank_avg": 0.20103488862514496, "step": 3050, "valid_targets_mean": 5160.4, "valid_targets_min": 936 }, { "epoch": 4.888, "grad_norm": 0.5687408463816219, "learning_rate": 6.288659560606203e-08, "loss": 0.3219, "loss_nan_ranks": 0, "loss_rank_avg": 0.1383986473083496, "step": 3055, "valid_targets_mean": 3382.1, "valid_targets_min": 1005 }, { "epoch": 4.896, "grad_norm": 0.4996884419003454, "learning_rate": 5.4345076487332114e-08, "loss": 0.3192, "loss_nan_ranks": 0, "loss_rank_avg": 0.18319477140903473, "step": 3060, "valid_targets_mean": 4995.0, "valid_targets_min": 1134 }, { "epoch": 4.904, "grad_norm": 0.465356601725948, "learning_rate": 4.642593832144382e-08, "loss": 0.2856, "loss_nan_ranks": 0, "loss_rank_avg": 0.1306002140045166, "step": 3065, "valid_targets_mean": 4357.2, "valid_targets_min": 667 }, { "epoch": 4.912, "grad_norm": 0.6879235684017253, "learning_rate": 3.912942821589161e-08, "loss": 0.2827, "loss_nan_ranks": 0, "loss_rank_avg": 0.1553347408771515, "step": 3070, "valid_targets_mean": 5430.6, "valid_targets_min": 2537 }, { "epoch": 4.92, "grad_norm": 0.6393549752658132, "learning_rate": 3.2455773849779935e-08, "loss": 0.3238, "loss_nan_ranks": 0, "loss_rank_avg": 0.21189448237419128, "step": 3075, "valid_targets_mean": 3572.1, "valid_targets_min": 1467 }, { "epoch": 4.928, "grad_norm": 0.4888826335889139, "learning_rate": 2.6405183466731154e-08, "loss": 0.3143, "loss_nan_ranks": 0, "loss_rank_avg": 0.11773912608623505, "step": 3080, "valid_targets_mean": 3744.6, "valid_targets_min": 1223 }, { "epoch": 4.936, "grad_norm": 0.5646847598387272, "learning_rate": 2.0977845868375145e-08, "loss": 0.2988, "loss_nan_ranks": 0, "loss_rank_avg": 0.1856897473335266, "step": 3085, "valid_targets_mean": 3780.4, "valid_targets_min": 613 }, { "epoch": 4.944, "grad_norm": 0.4228284310980491, "learning_rate": 1.6173930408467376e-08, "loss": 0.3228, "loss_nan_ranks": 0, "loss_rank_avg": 0.14991110563278198, "step": 3090, "valid_targets_mean": 5997.0, "valid_targets_min": 817 }, { "epoch": 4.952, "grad_norm": 0.46279632136180054, "learning_rate": 1.199358698759978e-08, "loss": 0.2987, "loss_nan_ranks": 0, "loss_rank_avg": 0.15612635016441345, "step": 3095, "valid_targets_mean": 5848.9, "valid_targets_min": 800 }, { "epoch": 4.96, "grad_norm": 0.5214714566840865, "learning_rate": 8.436946048522298e-09, "loss": 0.3246, "loss_nan_ranks": 0, "loss_rank_avg": 0.20194128155708313, "step": 3100, "valid_targets_mean": 4828.2, "valid_targets_min": 3631 }, { "epoch": 4.968, "grad_norm": 0.47269884538929174, "learning_rate": 5.504118572081662e-09, "loss": 0.3072, "loss_nan_ranks": 0, "loss_rank_avg": 0.1507304161787033, "step": 3105, "valid_targets_mean": 4550.4, "valid_targets_min": 779 }, { "epoch": 4.976, "grad_norm": 0.48876143808029926, "learning_rate": 3.1951960737419686e-09, "loss": 0.305, "loss_nan_ranks": 0, "loss_rank_avg": 0.2046237587928772, "step": 3110, "valid_targets_mean": 6304.9, "valid_targets_min": 1809 }, { "epoch": 4.984, "grad_norm": 0.5173224903959538, "learning_rate": 1.5102506007447227e-09, "loss": 0.3073, "loss_nan_ranks": 0, "loss_rank_avg": 0.11847519129514694, "step": 3115, "valid_targets_mean": 2805.1, "valid_targets_min": 1730 }, { "epoch": 4.992, "grad_norm": 0.5730468668213601, "learning_rate": 4.493347298528683e-10, "loss": 0.3402, "loss_nan_ranks": 0, "loss_rank_avg": 0.1377507597208023, "step": 3120, "valid_targets_mean": 3388.4, "valid_targets_min": 1106 }, { "epoch": 5.0, "grad_norm": 0.5459769103106156, "learning_rate": 1.248156571209691e-11, "loss": 0.296, "loss_nan_ranks": 0, "loss_rank_avg": 0.1909605711698532, "step": 3125, "valid_targets_mean": 3954.1, "valid_targets_min": 1068 }, { "epoch": 5.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.1909605711698532, "step": 3125, "total_flos": 7.266001898207969e+17, "train_loss": 0.3592725233459473, "train_runtime": 30483.9243, "train_samples_per_second": 1.64, "train_steps_per_second": 0.103, "valid_targets_mean": 3954.1, "valid_targets_min": 1068 } ], "logging_steps": 5, "max_steps": 3125, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.266001898207969e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }