{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 3125, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008, "grad_norm": 8.019209505645138, "learning_rate": 5.111821086261981e-07, "loss": 0.9704, "loss_nan_ranks": 0, "loss_rank_avg": 0.9074519872665405, "step": 5, "valid_targets_mean": 4634.1, "valid_targets_min": 769 }, { "epoch": 0.016, "grad_norm": 8.056913572069162, "learning_rate": 1.1501597444089457e-06, "loss": 0.9831, "loss_nan_ranks": 0, "loss_rank_avg": 0.9801443815231323, "step": 10, "valid_targets_mean": 3432.8, "valid_targets_min": 888 }, { "epoch": 0.024, "grad_norm": 5.890567315411599, "learning_rate": 1.7891373801916933e-06, "loss": 0.9216, "loss_nan_ranks": 0, "loss_rank_avg": 0.9462229609489441, "step": 15, "valid_targets_mean": 3846.5, "valid_targets_min": 622 }, { "epoch": 0.032, "grad_norm": 4.815413753270494, "learning_rate": 2.428115015974441e-06, "loss": 0.9003, "loss_nan_ranks": 0, "loss_rank_avg": 0.9328570365905762, "step": 20, "valid_targets_mean": 2826.7, "valid_targets_min": 592 }, { "epoch": 0.04, "grad_norm": 3.2132576974472262, "learning_rate": 3.0670926517571885e-06, "loss": 0.9021, "loss_nan_ranks": 0, "loss_rank_avg": 0.9593732357025146, "step": 25, "valid_targets_mean": 5006.8, "valid_targets_min": 602 }, { "epoch": 0.048, "grad_norm": 2.1277286968257494, "learning_rate": 3.7060702875399364e-06, "loss": 0.8146, "loss_nan_ranks": 0, "loss_rank_avg": 0.8108715415000916, "step": 30, "valid_targets_mean": 4988.4, "valid_targets_min": 764 }, { "epoch": 0.056, "grad_norm": 1.8316025070705764, "learning_rate": 4.345047923322684e-06, "loss": 0.8371, "loss_nan_ranks": 0, "loss_rank_avg": 0.8360515832901001, "step": 35, "valid_targets_mean": 2703.9, "valid_targets_min": 958 }, { "epoch": 0.064, "grad_norm": 0.974656430471622, "learning_rate": 4.984025559105431e-06, "loss": 0.8143, "loss_nan_ranks": 0, "loss_rank_avg": 0.738986611366272, "step": 40, "valid_targets_mean": 5619.8, "valid_targets_min": 496 }, { "epoch": 0.072, "grad_norm": 1.2003528549510873, "learning_rate": 5.623003194888179e-06, "loss": 0.8001, "loss_nan_ranks": 0, "loss_rank_avg": 0.834333598613739, "step": 45, "valid_targets_mean": 2614.9, "valid_targets_min": 534 }, { "epoch": 0.08, "grad_norm": 0.9375989258322925, "learning_rate": 6.261980830670928e-06, "loss": 0.7226, "loss_nan_ranks": 0, "loss_rank_avg": 0.7906558513641357, "step": 50, "valid_targets_mean": 3902.1, "valid_targets_min": 678 }, { "epoch": 0.088, "grad_norm": 0.947503985003781, "learning_rate": 6.900958466453675e-06, "loss": 0.738, "loss_nan_ranks": 0, "loss_rank_avg": 0.7608993053436279, "step": 55, "valid_targets_mean": 2622.6, "valid_targets_min": 640 }, { "epoch": 0.096, "grad_norm": 1.062618699465369, "learning_rate": 7.5399361022364225e-06, "loss": 0.7835, "loss_nan_ranks": 0, "loss_rank_avg": 0.8355079293251038, "step": 60, "valid_targets_mean": 2038.3, "valid_targets_min": 616 }, { "epoch": 0.104, "grad_norm": 0.6519054791996576, "learning_rate": 8.17891373801917e-06, "loss": 0.7377, "loss_nan_ranks": 0, "loss_rank_avg": 0.6525523662567139, "step": 65, "valid_targets_mean": 4931.8, "valid_targets_min": 879 }, { "epoch": 0.112, "grad_norm": 0.8181807803276753, "learning_rate": 8.817891373801917e-06, "loss": 0.7468, "loss_nan_ranks": 0, "loss_rank_avg": 0.7478166818618774, "step": 70, "valid_targets_mean": 2875.1, "valid_targets_min": 684 }, { "epoch": 0.12, "grad_norm": 0.8077200310163509, "learning_rate": 9.456869009584665e-06, "loss": 0.7, "loss_nan_ranks": 0, "loss_rank_avg": 0.7476080656051636, "step": 75, "valid_targets_mean": 2929.1, "valid_targets_min": 918 }, { "epoch": 0.128, "grad_norm": 0.6645101628172224, "learning_rate": 1.0095846645367413e-05, "loss": 0.6873, "loss_nan_ranks": 0, "loss_rank_avg": 0.7070491313934326, "step": 80, "valid_targets_mean": 3938.2, "valid_targets_min": 1116 }, { "epoch": 0.136, "grad_norm": 0.5668806150237382, "learning_rate": 1.073482428115016e-05, "loss": 0.6795, "loss_nan_ranks": 0, "loss_rank_avg": 0.6539013385772705, "step": 85, "valid_targets_mean": 4791.0, "valid_targets_min": 575 }, { "epoch": 0.144, "grad_norm": 0.631829978989927, "learning_rate": 1.1373801916932907e-05, "loss": 0.6727, "loss_nan_ranks": 0, "loss_rank_avg": 0.6462942957878113, "step": 90, "valid_targets_mean": 4125.3, "valid_targets_min": 746 }, { "epoch": 0.152, "grad_norm": 0.7522145077585198, "learning_rate": 1.2012779552715656e-05, "loss": 0.6913, "loss_nan_ranks": 0, "loss_rank_avg": 0.6524339914321899, "step": 95, "valid_targets_mean": 2768.8, "valid_targets_min": 730 }, { "epoch": 0.16, "grad_norm": 0.6436788258939267, "learning_rate": 1.2651757188498404e-05, "loss": 0.6746, "loss_nan_ranks": 0, "loss_rank_avg": 0.6656085848808289, "step": 100, "valid_targets_mean": 4070.2, "valid_targets_min": 693 }, { "epoch": 0.168, "grad_norm": 0.6829360551307666, "learning_rate": 1.329073482428115e-05, "loss": 0.6581, "loss_nan_ranks": 0, "loss_rank_avg": 0.696590006351471, "step": 105, "valid_targets_mean": 3549.9, "valid_targets_min": 985 }, { "epoch": 0.176, "grad_norm": 0.6518448907533483, "learning_rate": 1.39297124600639e-05, "loss": 0.6693, "loss_nan_ranks": 0, "loss_rank_avg": 0.6982806921005249, "step": 110, "valid_targets_mean": 3914.5, "valid_targets_min": 923 }, { "epoch": 0.184, "grad_norm": 0.7108542582881471, "learning_rate": 1.4568690095846648e-05, "loss": 0.6437, "loss_nan_ranks": 0, "loss_rank_avg": 0.6319302916526794, "step": 115, "valid_targets_mean": 3878.2, "valid_targets_min": 667 }, { "epoch": 0.192, "grad_norm": 0.6111508324238974, "learning_rate": 1.5207667731629394e-05, "loss": 0.6237, "loss_nan_ranks": 0, "loss_rank_avg": 0.6298456788063049, "step": 120, "valid_targets_mean": 4183.7, "valid_targets_min": 831 }, { "epoch": 0.2, "grad_norm": 0.636766820540127, "learning_rate": 1.584664536741214e-05, "loss": 0.6248, "loss_nan_ranks": 0, "loss_rank_avg": 0.6381227970123291, "step": 125, "valid_targets_mean": 4150.9, "valid_targets_min": 279 }, { "epoch": 0.208, "grad_norm": 0.7297687830940379, "learning_rate": 1.648562300319489e-05, "loss": 0.6313, "loss_nan_ranks": 0, "loss_rank_avg": 0.6899981498718262, "step": 130, "valid_targets_mean": 3481.6, "valid_targets_min": 879 }, { "epoch": 0.216, "grad_norm": 0.6994014106747448, "learning_rate": 1.712460063897764e-05, "loss": 0.6099, "loss_nan_ranks": 0, "loss_rank_avg": 0.641595184803009, "step": 135, "valid_targets_mean": 4528.6, "valid_targets_min": 788 }, { "epoch": 0.224, "grad_norm": 0.682372825217958, "learning_rate": 1.7763578274760385e-05, "loss": 0.6255, "loss_nan_ranks": 0, "loss_rank_avg": 0.6313050985336304, "step": 140, "valid_targets_mean": 4437.3, "valid_targets_min": 572 }, { "epoch": 0.232, "grad_norm": 0.6870311865242572, "learning_rate": 1.840255591054313e-05, "loss": 0.6237, "loss_nan_ranks": 0, "loss_rank_avg": 0.5983741283416748, "step": 145, "valid_targets_mean": 3244.9, "valid_targets_min": 745 }, { "epoch": 0.24, "grad_norm": 0.7883183108248433, "learning_rate": 1.904153354632588e-05, "loss": 0.6184, "loss_nan_ranks": 0, "loss_rank_avg": 0.6510107517242432, "step": 150, "valid_targets_mean": 3022.1, "valid_targets_min": 687 }, { "epoch": 0.248, "grad_norm": 0.6559928431572688, "learning_rate": 1.9680511182108627e-05, "loss": 0.6168, "loss_nan_ranks": 0, "loss_rank_avg": 0.6277835369110107, "step": 155, "valid_targets_mean": 4691.1, "valid_targets_min": 774 }, { "epoch": 0.256, "grad_norm": 0.7040494425503686, "learning_rate": 2.0319488817891376e-05, "loss": 0.6178, "loss_nan_ranks": 0, "loss_rank_avg": 0.5613512992858887, "step": 160, "valid_targets_mean": 3659.9, "valid_targets_min": 691 }, { "epoch": 0.264, "grad_norm": 0.7293885471955911, "learning_rate": 2.0958466453674126e-05, "loss": 0.578, "loss_nan_ranks": 0, "loss_rank_avg": 0.546758234500885, "step": 165, "valid_targets_mean": 3741.1, "valid_targets_min": 651 }, { "epoch": 0.272, "grad_norm": 0.772438514428517, "learning_rate": 2.1597444089456872e-05, "loss": 0.5705, "loss_nan_ranks": 0, "loss_rank_avg": 0.5695148706436157, "step": 170, "valid_targets_mean": 3372.0, "valid_targets_min": 744 }, { "epoch": 0.28, "grad_norm": 0.8184517511660472, "learning_rate": 2.2236421725239618e-05, "loss": 0.6274, "loss_nan_ranks": 0, "loss_rank_avg": 0.6200259923934937, "step": 175, "valid_targets_mean": 2952.8, "valid_targets_min": 919 }, { "epoch": 0.288, "grad_norm": 0.6401835135693486, "learning_rate": 2.2875399361022364e-05, "loss": 0.5871, "loss_nan_ranks": 0, "loss_rank_avg": 0.6663122773170471, "step": 180, "valid_targets_mean": 4482.8, "valid_targets_min": 785 }, { "epoch": 0.296, "grad_norm": 0.562579817398789, "learning_rate": 2.3514376996805114e-05, "loss": 0.594, "loss_nan_ranks": 0, "loss_rank_avg": 0.5693539977073669, "step": 185, "valid_targets_mean": 5274.5, "valid_targets_min": 669 }, { "epoch": 0.304, "grad_norm": 3.023582033179472, "learning_rate": 2.415335463258786e-05, "loss": 0.5726, "loss_nan_ranks": 0, "loss_rank_avg": 0.6133388876914978, "step": 190, "valid_targets_mean": 3461.5, "valid_targets_min": 534 }, { "epoch": 0.312, "grad_norm": 0.63776000505745, "learning_rate": 2.4792332268370606e-05, "loss": 0.6095, "loss_nan_ranks": 0, "loss_rank_avg": 0.5332133173942566, "step": 195, "valid_targets_mean": 4770.1, "valid_targets_min": 613 }, { "epoch": 0.32, "grad_norm": 0.6537457497646995, "learning_rate": 2.543130990415336e-05, "loss": 0.5961, "loss_nan_ranks": 0, "loss_rank_avg": 0.6032932996749878, "step": 200, "valid_targets_mean": 4235.5, "valid_targets_min": 418 }, { "epoch": 0.328, "grad_norm": 0.679784159844079, "learning_rate": 2.6070287539936105e-05, "loss": 0.5781, "loss_nan_ranks": 0, "loss_rank_avg": 0.5432563424110413, "step": 205, "valid_targets_mean": 3482.2, "valid_targets_min": 474 }, { "epoch": 0.336, "grad_norm": 0.6536420335215318, "learning_rate": 2.670926517571885e-05, "loss": 0.5527, "loss_nan_ranks": 0, "loss_rank_avg": 0.5386021137237549, "step": 210, "valid_targets_mean": 4072.3, "valid_targets_min": 288 }, { "epoch": 0.344, "grad_norm": 0.7579600175695036, "learning_rate": 2.73482428115016e-05, "loss": 0.5917, "loss_nan_ranks": 0, "loss_rank_avg": 0.5911605358123779, "step": 215, "valid_targets_mean": 2998.2, "valid_targets_min": 506 }, { "epoch": 0.352, "grad_norm": 0.8790312000878335, "learning_rate": 2.7987220447284347e-05, "loss": 0.6193, "loss_nan_ranks": 0, "loss_rank_avg": 0.5623832941055298, "step": 220, "valid_targets_mean": 2856.6, "valid_targets_min": 660 }, { "epoch": 0.36, "grad_norm": 0.8411714839123007, "learning_rate": 2.8626198083067093e-05, "loss": 0.5871, "loss_nan_ranks": 0, "loss_rank_avg": 0.6712679862976074, "step": 225, "valid_targets_mean": 3072.1, "valid_targets_min": 810 }, { "epoch": 0.368, "grad_norm": 0.6480362190445711, "learning_rate": 2.9265175718849843e-05, "loss": 0.5827, "loss_nan_ranks": 0, "loss_rank_avg": 0.5256530046463013, "step": 230, "valid_targets_mean": 3665.3, "valid_targets_min": 536 }, { "epoch": 0.376, "grad_norm": 0.6347344055975906, "learning_rate": 2.9904153354632592e-05, "loss": 0.5871, "loss_nan_ranks": 0, "loss_rank_avg": 0.6053222417831421, "step": 235, "valid_targets_mean": 4444.1, "valid_targets_min": 850 }, { "epoch": 0.384, "grad_norm": 0.7522140681943507, "learning_rate": 3.054313099041534e-05, "loss": 0.5885, "loss_nan_ranks": 0, "loss_rank_avg": 0.6026010513305664, "step": 240, "valid_targets_mean": 3201.2, "valid_targets_min": 447 }, { "epoch": 0.392, "grad_norm": 0.59444256273076, "learning_rate": 3.1182108626198084e-05, "loss": 0.5656, "loss_nan_ranks": 0, "loss_rank_avg": 0.5105293989181519, "step": 245, "valid_targets_mean": 4720.0, "valid_targets_min": 511 }, { "epoch": 0.4, "grad_norm": 0.7082748798819095, "learning_rate": 3.1821086261980834e-05, "loss": 0.5715, "loss_nan_ranks": 0, "loss_rank_avg": 0.5332015752792358, "step": 250, "valid_targets_mean": 3776.2, "valid_targets_min": 552 }, { "epoch": 0.408, "grad_norm": 0.5935423684362258, "learning_rate": 3.246006389776358e-05, "loss": 0.5579, "loss_nan_ranks": 0, "loss_rank_avg": 0.5111772418022156, "step": 255, "valid_targets_mean": 4674.4, "valid_targets_min": 667 }, { "epoch": 0.416, "grad_norm": 0.8490750724860248, "learning_rate": 3.3099041533546326e-05, "loss": 0.5582, "loss_nan_ranks": 0, "loss_rank_avg": 0.5234127044677734, "step": 260, "valid_targets_mean": 4834.0, "valid_targets_min": 767 }, { "epoch": 0.424, "grad_norm": 0.6787686121432547, "learning_rate": 3.3738019169329076e-05, "loss": 0.5422, "loss_nan_ranks": 0, "loss_rank_avg": 0.5584520101547241, "step": 265, "valid_targets_mean": 3572.9, "valid_targets_min": 739 }, { "epoch": 0.432, "grad_norm": 0.7210865201308775, "learning_rate": 3.4376996805111825e-05, "loss": 0.5439, "loss_nan_ranks": 0, "loss_rank_avg": 0.5728445649147034, "step": 270, "valid_targets_mean": 3147.6, "valid_targets_min": 709 }, { "epoch": 0.44, "grad_norm": 0.9390958887116584, "learning_rate": 3.5015974440894575e-05, "loss": 0.572, "loss_nan_ranks": 0, "loss_rank_avg": 0.6330382823944092, "step": 275, "valid_targets_mean": 2163.2, "valid_targets_min": 771 }, { "epoch": 0.448, "grad_norm": 0.7172744444294396, "learning_rate": 3.565495207667732e-05, "loss": 0.5708, "loss_nan_ranks": 0, "loss_rank_avg": 0.547631561756134, "step": 280, "valid_targets_mean": 3426.3, "valid_targets_min": 492 }, { "epoch": 0.456, "grad_norm": 0.6587837222708766, "learning_rate": 3.629392971246007e-05, "loss": 0.5714, "loss_nan_ranks": 0, "loss_rank_avg": 0.5626546144485474, "step": 285, "valid_targets_mean": 4249.6, "valid_targets_min": 615 }, { "epoch": 0.464, "grad_norm": 0.5946489699255438, "learning_rate": 3.6932907348242816e-05, "loss": 0.5554, "loss_nan_ranks": 0, "loss_rank_avg": 0.6051210165023804, "step": 290, "valid_targets_mean": 4729.2, "valid_targets_min": 655 }, { "epoch": 0.472, "grad_norm": 0.7892760841457642, "learning_rate": 3.757188498402556e-05, "loss": 0.5811, "loss_nan_ranks": 0, "loss_rank_avg": 0.6125881671905518, "step": 295, "valid_targets_mean": 3489.5, "valid_targets_min": 607 }, { "epoch": 0.48, "grad_norm": 0.6623319928800957, "learning_rate": 3.821086261980831e-05, "loss": 0.5852, "loss_nan_ranks": 0, "loss_rank_avg": 0.5646267533302307, "step": 300, "valid_targets_mean": 3532.5, "valid_targets_min": 851 }, { "epoch": 0.488, "grad_norm": 0.6226375467862089, "learning_rate": 3.884984025559106e-05, "loss": 0.5714, "loss_nan_ranks": 0, "loss_rank_avg": 0.5953035354614258, "step": 305, "valid_targets_mean": 4777.0, "valid_targets_min": 785 }, { "epoch": 0.496, "grad_norm": 0.7584022572749837, "learning_rate": 3.94888178913738e-05, "loss": 0.5374, "loss_nan_ranks": 0, "loss_rank_avg": 0.5502133369445801, "step": 310, "valid_targets_mean": 3806.0, "valid_targets_min": 679 }, { "epoch": 0.504, "grad_norm": 0.7226522137453772, "learning_rate": 3.9999987518434296e-05, "loss": 0.5273, "loss_nan_ranks": 0, "loss_rank_avg": 0.5224813222885132, "step": 315, "valid_targets_mean": 3150.1, "valid_targets_min": 655 }, { "epoch": 0.512, "grad_norm": 0.6141445710145204, "learning_rate": 3.999955066527015e-05, "loss": 0.5718, "loss_nan_ranks": 0, "loss_rank_avg": 0.5544633269309998, "step": 320, "valid_targets_mean": 4412.2, "valid_targets_min": 535 }, { "epoch": 0.52, "grad_norm": 0.8461142526243931, "learning_rate": 3.999848974939926e-05, "loss": 0.5414, "loss_nan_ranks": 0, "loss_rank_avg": 0.5990142822265625, "step": 325, "valid_targets_mean": 2348.9, "valid_targets_min": 499 }, { "epoch": 0.528, "grad_norm": 0.8250606491282058, "learning_rate": 3.999680480392626e-05, "loss": 0.5356, "loss_nan_ranks": 0, "loss_rank_avg": 0.5679922103881836, "step": 330, "valid_targets_mean": 3415.2, "valid_targets_min": 629 }, { "epoch": 0.536, "grad_norm": 0.637073771711946, "learning_rate": 3.999449588142792e-05, "loss": 0.534, "loss_nan_ranks": 0, "loss_rank_avg": 0.4861408770084381, "step": 335, "valid_targets_mean": 3706.7, "valid_targets_min": 631 }, { "epoch": 0.544, "grad_norm": 0.8398228483120712, "learning_rate": 3.9991563053951476e-05, "loss": 0.5715, "loss_nan_ranks": 0, "loss_rank_avg": 0.646068811416626, "step": 340, "valid_targets_mean": 2273.1, "valid_targets_min": 433 }, { "epoch": 0.552, "grad_norm": 0.6121671335659467, "learning_rate": 3.99880064130124e-05, "loss": 0.5564, "loss_nan_ranks": 0, "loss_rank_avg": 0.5839049816131592, "step": 345, "valid_targets_mean": 4629.9, "valid_targets_min": 878 }, { "epoch": 0.56, "grad_norm": 0.7358980937968385, "learning_rate": 3.9983826069591535e-05, "loss": 0.5492, "loss_nan_ranks": 0, "loss_rank_avg": 0.5735797882080078, "step": 350, "valid_targets_mean": 3127.5, "valid_targets_min": 725 }, { "epoch": 0.568, "grad_norm": 0.6083717163603471, "learning_rate": 3.997902215413163e-05, "loss": 0.5249, "loss_nan_ranks": 0, "loss_rank_avg": 0.5112648010253906, "step": 355, "valid_targets_mean": 4718.0, "valid_targets_min": 507 }, { "epoch": 0.576, "grad_norm": 0.6072026819837963, "learning_rate": 3.997359481653327e-05, "loss": 0.5393, "loss_nan_ranks": 0, "loss_rank_avg": 0.5504653453826904, "step": 360, "valid_targets_mean": 4234.2, "valid_targets_min": 578 }, { "epoch": 0.584, "grad_norm": 0.5970402191237862, "learning_rate": 3.996754422615023e-05, "loss": 0.547, "loss_nan_ranks": 0, "loss_rank_avg": 0.5461069345474243, "step": 365, "valid_targets_mean": 4407.4, "valid_targets_min": 606 }, { "epoch": 0.592, "grad_norm": 0.7672984290731282, "learning_rate": 3.996087057178411e-05, "loss": 0.5539, "loss_nan_ranks": 0, "loss_rank_avg": 0.6059372425079346, "step": 370, "valid_targets_mean": 3555.2, "valid_targets_min": 599 }, { "epoch": 0.6, "grad_norm": 0.7939676092215848, "learning_rate": 3.995357406167856e-05, "loss": 0.5563, "loss_nan_ranks": 0, "loss_rank_avg": 0.5978292226791382, "step": 375, "valid_targets_mean": 2776.0, "valid_targets_min": 734 }, { "epoch": 0.608, "grad_norm": 0.6525335154383021, "learning_rate": 3.994565492351267e-05, "loss": 0.5352, "loss_nan_ranks": 0, "loss_rank_avg": 0.5540485382080078, "step": 380, "valid_targets_mean": 4123.6, "valid_targets_min": 583 }, { "epoch": 0.616, "grad_norm": 0.6989698695789527, "learning_rate": 3.993711340439394e-05, "loss": 0.5343, "loss_nan_ranks": 0, "loss_rank_avg": 0.6023855209350586, "step": 385, "valid_targets_mean": 3758.4, "valid_targets_min": 463 }, { "epoch": 0.624, "grad_norm": 0.7124652823560258, "learning_rate": 3.9927949770850535e-05, "loss": 0.5326, "loss_nan_ranks": 0, "loss_rank_avg": 0.5666008591651917, "step": 390, "valid_targets_mean": 3212.3, "valid_targets_min": 754 }, { "epoch": 0.632, "grad_norm": 0.6842076356533583, "learning_rate": 3.991816430882297e-05, "loss": 0.5375, "loss_nan_ranks": 0, "loss_rank_avg": 0.5813860297203064, "step": 395, "valid_targets_mean": 4066.8, "valid_targets_min": 730 }, { "epoch": 0.64, "grad_norm": 0.529144826449527, "learning_rate": 3.9907757323655206e-05, "loss": 0.5396, "loss_nan_ranks": 0, "loss_rank_avg": 0.555700957775116, "step": 400, "valid_targets_mean": 7514.6, "valid_targets_min": 874 }, { "epoch": 0.648, "grad_norm": 0.687588424871281, "learning_rate": 3.98967291400851e-05, "loss": 0.5169, "loss_nan_ranks": 0, "loss_rank_avg": 0.5300942659378052, "step": 405, "valid_targets_mean": 3809.2, "valid_targets_min": 695 }, { "epoch": 0.656, "grad_norm": 0.6988433600033485, "learning_rate": 3.98850801022343e-05, "loss": 0.5232, "loss_nan_ranks": 0, "loss_rank_avg": 0.44089508056640625, "step": 410, "valid_targets_mean": 4746.0, "valid_targets_min": 570 }, { "epoch": 0.664, "grad_norm": 0.6587643148830101, "learning_rate": 3.987281057359746e-05, "loss": 0.5335, "loss_nan_ranks": 0, "loss_rank_avg": 0.5711066126823425, "step": 415, "valid_targets_mean": 3932.2, "valid_targets_min": 507 }, { "epoch": 0.672, "grad_norm": 0.6308013522460244, "learning_rate": 3.985992093703096e-05, "loss": 0.5377, "loss_nan_ranks": 0, "loss_rank_avg": 0.5582001805305481, "step": 420, "valid_targets_mean": 3887.5, "valid_targets_min": 589 }, { "epoch": 0.68, "grad_norm": 0.5418196566696337, "learning_rate": 3.98464115947409e-05, "loss": 0.5323, "loss_nan_ranks": 0, "loss_rank_avg": 0.49433648586273193, "step": 425, "valid_targets_mean": 5441.3, "valid_targets_min": 720 }, { "epoch": 0.688, "grad_norm": 0.7175445202648967, "learning_rate": 3.9832282968270595e-05, "loss": 0.5287, "loss_nan_ranks": 0, "loss_rank_avg": 0.552021861076355, "step": 430, "valid_targets_mean": 2823.0, "valid_targets_min": 776 }, { "epoch": 0.696, "grad_norm": 0.6367170247884075, "learning_rate": 3.9817535498487385e-05, "loss": 0.5304, "loss_nan_ranks": 0, "loss_rank_avg": 0.5349035263061523, "step": 435, "valid_targets_mean": 4406.4, "valid_targets_min": 761 }, { "epoch": 0.704, "grad_norm": 0.6231922620233467, "learning_rate": 3.980216964556892e-05, "loss": 0.5176, "loss_nan_ranks": 0, "loss_rank_avg": 0.5187514424324036, "step": 440, "valid_targets_mean": 4343.2, "valid_targets_min": 724 }, { "epoch": 0.712, "grad_norm": 0.7123358352627772, "learning_rate": 3.978618588898873e-05, "loss": 0.5295, "loss_nan_ranks": 0, "loss_rank_avg": 0.5067673921585083, "step": 445, "valid_targets_mean": 3212.3, "valid_targets_min": 872 }, { "epoch": 0.72, "grad_norm": 0.7454664370057654, "learning_rate": 3.976958472750137e-05, "loss": 0.537, "loss_nan_ranks": 0, "loss_rank_avg": 0.5686776638031006, "step": 450, "valid_targets_mean": 3169.4, "valid_targets_min": 646 }, { "epoch": 0.728, "grad_norm": 0.5174561677012455, "learning_rate": 3.9752366679126754e-05, "loss": 0.528, "loss_nan_ranks": 0, "loss_rank_avg": 0.5228891968727112, "step": 455, "valid_targets_mean": 5854.7, "valid_targets_min": 495 }, { "epoch": 0.736, "grad_norm": 0.7061480122954578, "learning_rate": 3.973453228113405e-05, "loss": 0.5422, "loss_nan_ranks": 0, "loss_rank_avg": 0.5699916481971741, "step": 460, "valid_targets_mean": 3457.9, "valid_targets_min": 631 }, { "epoch": 0.744, "grad_norm": 0.6372505538914572, "learning_rate": 3.971608209002489e-05, "loss": 0.5036, "loss_nan_ranks": 0, "loss_rank_avg": 0.510094940662384, "step": 465, "valid_targets_mean": 3599.1, "valid_targets_min": 827 }, { "epoch": 0.752, "grad_norm": 0.6192730292845192, "learning_rate": 3.969701668151603e-05, "loss": 0.4938, "loss_nan_ranks": 0, "loss_rank_avg": 0.5506471395492554, "step": 470, "valid_targets_mean": 4186.9, "valid_targets_min": 605 }, { "epoch": 0.76, "grad_norm": 0.6458176448811681, "learning_rate": 3.9677336650521336e-05, "loss": 0.51, "loss_nan_ranks": 0, "loss_rank_avg": 0.4645317792892456, "step": 475, "valid_targets_mean": 5438.2, "valid_targets_min": 691 }, { "epoch": 0.768, "grad_norm": 0.6877541407077931, "learning_rate": 3.9657042611133294e-05, "loss": 0.5309, "loss_nan_ranks": 0, "loss_rank_avg": 0.5832065343856812, "step": 480, "valid_targets_mean": 4794.7, "valid_targets_min": 1011 }, { "epoch": 0.776, "grad_norm": 0.7261934590292777, "learning_rate": 3.963613519660379e-05, "loss": 0.5469, "loss_nan_ranks": 0, "loss_rank_avg": 0.5814934968948364, "step": 485, "valid_targets_mean": 3265.4, "valid_targets_min": 814 }, { "epoch": 0.784, "grad_norm": 0.5727199057578912, "learning_rate": 3.961461505932435e-05, "loss": 0.5226, "loss_nan_ranks": 0, "loss_rank_avg": 0.5271404385566711, "step": 490, "valid_targets_mean": 5650.6, "valid_targets_min": 795 }, { "epoch": 0.792, "grad_norm": 0.6931137062168018, "learning_rate": 3.959248287080583e-05, "loss": 0.4874, "loss_nan_ranks": 0, "loss_rank_avg": 0.5182836055755615, "step": 495, "valid_targets_mean": 2894.6, "valid_targets_min": 634 }, { "epoch": 0.8, "grad_norm": 0.5579037807480154, "learning_rate": 3.9569739321657416e-05, "loss": 0.5329, "loss_nan_ranks": 0, "loss_rank_avg": 0.4890630543231964, "step": 500, "valid_targets_mean": 4849.6, "valid_targets_min": 882 }, { "epoch": 0.808, "grad_norm": 0.5278555941049078, "learning_rate": 3.9546385121565095e-05, "loss": 0.5214, "loss_nan_ranks": 0, "loss_rank_avg": 0.4740561842918396, "step": 505, "valid_targets_mean": 5560.9, "valid_targets_min": 386 }, { "epoch": 0.816, "grad_norm": 0.7629284256030876, "learning_rate": 3.952242099926951e-05, "loss": 0.5019, "loss_nan_ranks": 0, "loss_rank_avg": 0.5348608493804932, "step": 510, "valid_targets_mean": 3005.8, "valid_targets_min": 485 }, { "epoch": 0.824, "grad_norm": 0.5984390769675876, "learning_rate": 3.9497847702543196e-05, "loss": 0.5011, "loss_nan_ranks": 0, "loss_rank_avg": 0.49162226915359497, "step": 515, "valid_targets_mean": 4003.1, "valid_targets_min": 864 }, { "epoch": 0.832, "grad_norm": 0.5425603828548777, "learning_rate": 3.94726659981673e-05, "loss": 0.5236, "loss_nan_ranks": 0, "loss_rank_avg": 0.4525638222694397, "step": 520, "valid_targets_mean": 4395.4, "valid_targets_min": 617 }, { "epoch": 0.84, "grad_norm": 0.527115667278833, "learning_rate": 3.94468766719076e-05, "loss": 0.5203, "loss_nan_ranks": 0, "loss_rank_avg": 0.5383976101875305, "step": 525, "valid_targets_mean": 6022.6, "valid_targets_min": 776 }, { "epoch": 0.848, "grad_norm": 0.48267274772473334, "learning_rate": 3.942048052849001e-05, "loss": 0.4667, "loss_nan_ranks": 0, "loss_rank_avg": 0.40104740858078003, "step": 530, "valid_targets_mean": 5891.2, "valid_targets_min": 675 }, { "epoch": 0.856, "grad_norm": 0.7791947549458919, "learning_rate": 3.939347839157548e-05, "loss": 0.5056, "loss_nan_ranks": 0, "loss_rank_avg": 0.5501073598861694, "step": 535, "valid_targets_mean": 2821.9, "valid_targets_min": 780 }, { "epoch": 0.864, "grad_norm": 0.7336120776006877, "learning_rate": 3.9365871103734264e-05, "loss": 0.5003, "loss_nan_ranks": 0, "loss_rank_avg": 0.5346959829330444, "step": 540, "valid_targets_mean": 3434.2, "valid_targets_min": 733 }, { "epoch": 0.872, "grad_norm": 0.5526825963889843, "learning_rate": 3.933765952641965e-05, "loss": 0.4971, "loss_nan_ranks": 0, "loss_rank_avg": 0.4672126770019531, "step": 545, "valid_targets_mean": 4462.4, "valid_targets_min": 904 }, { "epoch": 0.88, "grad_norm": 0.6189947196809612, "learning_rate": 3.930884453994109e-05, "loss": 0.4983, "loss_nan_ranks": 0, "loss_rank_avg": 0.4648451507091522, "step": 550, "valid_targets_mean": 3606.6, "valid_targets_min": 624 }, { "epoch": 0.888, "grad_norm": 0.6586170474235856, "learning_rate": 3.9279427043436706e-05, "loss": 0.5388, "loss_nan_ranks": 0, "loss_rank_avg": 0.5403224229812622, "step": 555, "valid_targets_mean": 3831.8, "valid_targets_min": 415 }, { "epoch": 0.896, "grad_norm": 0.5313121728145141, "learning_rate": 3.924940795484525e-05, "loss": 0.5063, "loss_nan_ranks": 0, "loss_rank_avg": 0.521693766117096, "step": 560, "valid_targets_mean": 5235.8, "valid_targets_min": 828 }, { "epoch": 0.904, "grad_norm": 0.6424391584952766, "learning_rate": 3.9218788210877436e-05, "loss": 0.5359, "loss_nan_ranks": 0, "loss_rank_avg": 0.5287529826164246, "step": 565, "valid_targets_mean": 3747.9, "valid_targets_min": 597 }, { "epoch": 0.912, "grad_norm": 0.7979713052864025, "learning_rate": 3.918756876698676e-05, "loss": 0.517, "loss_nan_ranks": 0, "loss_rank_avg": 0.5333600640296936, "step": 570, "valid_targets_mean": 4772.2, "valid_targets_min": 426 }, { "epoch": 0.92, "grad_norm": 0.5943189435660875, "learning_rate": 3.9155750597339634e-05, "loss": 0.4952, "loss_nan_ranks": 0, "loss_rank_avg": 0.5387169718742371, "step": 575, "valid_targets_mean": 4364.7, "valid_targets_min": 743 }, { "epoch": 0.928, "grad_norm": 0.6520342943163902, "learning_rate": 3.912333469478502e-05, "loss": 0.5197, "loss_nan_ranks": 0, "loss_rank_avg": 0.5179246068000793, "step": 580, "valid_targets_mean": 3263.0, "valid_targets_min": 735 }, { "epoch": 0.936, "grad_norm": 0.6103078060063878, "learning_rate": 3.909032207082344e-05, "loss": 0.4996, "loss_nan_ranks": 0, "loss_rank_avg": 0.49960577487945557, "step": 585, "valid_targets_mean": 4096.7, "valid_targets_min": 505 }, { "epoch": 0.944, "grad_norm": 0.585543703034473, "learning_rate": 3.90567137555754e-05, "loss": 0.5287, "loss_nan_ranks": 0, "loss_rank_avg": 0.5424899458885193, "step": 590, "valid_targets_mean": 4775.8, "valid_targets_min": 857 }, { "epoch": 0.952, "grad_norm": 0.6304193255405047, "learning_rate": 3.9022510797749286e-05, "loss": 0.5087, "loss_nan_ranks": 0, "loss_rank_avg": 0.5315080285072327, "step": 595, "valid_targets_mean": 4235.9, "valid_targets_min": 907 }, { "epoch": 0.96, "grad_norm": 0.585015028731908, "learning_rate": 3.898771426460859e-05, "loss": 0.4945, "loss_nan_ranks": 0, "loss_rank_avg": 0.4809357523918152, "step": 600, "valid_targets_mean": 3830.8, "valid_targets_min": 628 }, { "epoch": 0.968, "grad_norm": 0.68245860217036, "learning_rate": 3.8952325241938635e-05, "loss": 0.5126, "loss_nan_ranks": 0, "loss_rank_avg": 0.5318968296051025, "step": 605, "valid_targets_mean": 4136.1, "valid_targets_min": 635 }, { "epoch": 0.976, "grad_norm": 0.6148876956191819, "learning_rate": 3.8916344834012695e-05, "loss": 0.5311, "loss_nan_ranks": 0, "loss_rank_avg": 0.5257702469825745, "step": 610, "valid_targets_mean": 4727.1, "valid_targets_min": 644 }, { "epoch": 0.984, "grad_norm": 0.6143278822542023, "learning_rate": 3.887977416355754e-05, "loss": 0.5224, "loss_nan_ranks": 0, "loss_rank_avg": 0.4943949580192566, "step": 615, "valid_targets_mean": 3667.9, "valid_targets_min": 775 }, { "epoch": 0.992, "grad_norm": 0.639756859129058, "learning_rate": 3.884261437171838e-05, "loss": 0.5033, "loss_nan_ranks": 0, "loss_rank_avg": 0.5292478799819946, "step": 620, "valid_targets_mean": 3723.4, "valid_targets_min": 588 }, { "epoch": 1.0, "grad_norm": 0.6034626688615775, "learning_rate": 3.8804866618023284e-05, "loss": 0.4778, "loss_nan_ranks": 0, "loss_rank_avg": 0.5043004751205444, "step": 625, "valid_targets_mean": 4286.8, "valid_targets_min": 593 }, { "epoch": 1.008, "grad_norm": 0.5749723492360072, "learning_rate": 3.876653208034698e-05, "loss": 0.5212, "loss_nan_ranks": 0, "loss_rank_avg": 0.4886097311973572, "step": 630, "valid_targets_mean": 4484.8, "valid_targets_min": 710 }, { "epoch": 1.016, "grad_norm": 0.5975830354439139, "learning_rate": 3.8727611954874114e-05, "loss": 0.4941, "loss_nan_ranks": 0, "loss_rank_avg": 0.4872697591781616, "step": 635, "valid_targets_mean": 4030.0, "valid_targets_min": 701 }, { "epoch": 1.024, "grad_norm": 0.6626052135788788, "learning_rate": 3.8688107456061904e-05, "loss": 0.4666, "loss_nan_ranks": 0, "loss_rank_avg": 0.48330509662628174, "step": 640, "valid_targets_mean": 3877.7, "valid_targets_min": 751 }, { "epoch": 1.032, "grad_norm": 0.5825477612458702, "learning_rate": 3.864801981660227e-05, "loss": 0.4876, "loss_nan_ranks": 0, "loss_rank_avg": 0.50968998670578, "step": 645, "valid_targets_mean": 4582.6, "valid_targets_min": 678 }, { "epoch": 1.04, "grad_norm": 0.5888038935122548, "learning_rate": 3.860735028738337e-05, "loss": 0.4909, "loss_nan_ranks": 0, "loss_rank_avg": 0.4983733296394348, "step": 650, "valid_targets_mean": 5661.3, "valid_targets_min": 946 }, { "epoch": 1.048, "grad_norm": 0.7028901777525807, "learning_rate": 3.856610013745051e-05, "loss": 0.4675, "loss_nan_ranks": 0, "loss_rank_avg": 0.43987807631492615, "step": 655, "valid_targets_mean": 2972.2, "valid_targets_min": 540 }, { "epoch": 1.056, "grad_norm": 0.6837565819150657, "learning_rate": 3.852427065396665e-05, "loss": 0.5386, "loss_nan_ranks": 0, "loss_rank_avg": 0.54156893491745, "step": 660, "valid_targets_mean": 3283.2, "valid_targets_min": 984 }, { "epoch": 1.064, "grad_norm": 0.7103924713439609, "learning_rate": 3.848186314217213e-05, "loss": 0.4915, "loss_nan_ranks": 0, "loss_rank_avg": 0.5271738767623901, "step": 665, "valid_targets_mean": 3783.6, "valid_targets_min": 591 }, { "epoch": 1.072, "grad_norm": 0.5777473820322118, "learning_rate": 3.843887892534402e-05, "loss": 0.4949, "loss_nan_ranks": 0, "loss_rank_avg": 0.4844241142272949, "step": 670, "valid_targets_mean": 5510.7, "valid_targets_min": 801 }, { "epoch": 1.08, "grad_norm": 0.5592068039838433, "learning_rate": 3.8395319344754776e-05, "loss": 0.4824, "loss_nan_ranks": 0, "loss_rank_avg": 0.4355768859386444, "step": 675, "valid_targets_mean": 4872.8, "valid_targets_min": 775 }, { "epoch": 1.088, "grad_norm": 0.5196780282819776, "learning_rate": 3.8351185759630435e-05, "loss": 0.5028, "loss_nan_ranks": 0, "loss_rank_avg": 0.4816337525844574, "step": 680, "valid_targets_mean": 6182.8, "valid_targets_min": 689 }, { "epoch": 1.096, "grad_norm": 0.4974070083115012, "learning_rate": 3.830647954710816e-05, "loss": 0.4799, "loss_nan_ranks": 0, "loss_rank_avg": 0.4324873685836792, "step": 685, "valid_targets_mean": 5340.4, "valid_targets_min": 622 }, { "epoch": 1.104, "grad_norm": 0.5227557174907695, "learning_rate": 3.826120210219331e-05, "loss": 0.4665, "loss_nan_ranks": 0, "loss_rank_avg": 0.4405563771724701, "step": 690, "valid_targets_mean": 5245.5, "valid_targets_min": 822 }, { "epoch": 1.112, "grad_norm": 0.5415434424451512, "learning_rate": 3.8215354837715836e-05, "loss": 0.4753, "loss_nan_ranks": 0, "loss_rank_avg": 0.443329781293869, "step": 695, "valid_targets_mean": 6236.1, "valid_targets_min": 756 }, { "epoch": 1.12, "grad_norm": 0.6938512610507073, "learning_rate": 3.816893918428631e-05, "loss": 0.5224, "loss_nan_ranks": 0, "loss_rank_avg": 0.5610541701316833, "step": 700, "valid_targets_mean": 3587.6, "valid_targets_min": 975 }, { "epoch": 1.1280000000000001, "grad_norm": 0.7257198661742723, "learning_rate": 3.8121956590251153e-05, "loss": 0.4849, "loss_nan_ranks": 0, "loss_rank_avg": 0.511725902557373, "step": 705, "valid_targets_mean": 2714.5, "valid_targets_min": 704 }, { "epoch": 1.1360000000000001, "grad_norm": 0.7628570201199647, "learning_rate": 3.8074408521647576e-05, "loss": 0.4942, "loss_nan_ranks": 0, "loss_rank_avg": 0.5041239857673645, "step": 710, "valid_targets_mean": 4192.9, "valid_targets_min": 688 }, { "epoch": 1.144, "grad_norm": 0.7232575314594617, "learning_rate": 3.802629646215771e-05, "loss": 0.5357, "loss_nan_ranks": 0, "loss_rank_avg": 0.5536401271820068, "step": 715, "valid_targets_mean": 3258.6, "valid_targets_min": 492 }, { "epoch": 1.152, "grad_norm": 0.5322461206523738, "learning_rate": 3.79776219130624e-05, "loss": 0.5255, "loss_nan_ranks": 0, "loss_rank_avg": 0.4725511074066162, "step": 720, "valid_targets_mean": 5086.8, "valid_targets_min": 754 }, { "epoch": 1.16, "grad_norm": 0.5597590750275225, "learning_rate": 3.792838639319431e-05, "loss": 0.4851, "loss_nan_ranks": 0, "loss_rank_avg": 0.4070642590522766, "step": 725, "valid_targets_mean": 3765.9, "valid_targets_min": 598 }, { "epoch": 1.168, "grad_norm": 0.5995633156774554, "learning_rate": 3.787859143889054e-05, "loss": 0.4643, "loss_nan_ranks": 0, "loss_rank_avg": 0.5027796030044556, "step": 730, "valid_targets_mean": 4211.0, "valid_targets_min": 689 }, { "epoch": 1.176, "grad_norm": 0.7391042973959496, "learning_rate": 3.782823860394469e-05, "loss": 0.535, "loss_nan_ranks": 0, "loss_rank_avg": 0.478831022977829, "step": 735, "valid_targets_mean": 3402.7, "valid_targets_min": 428 }, { "epoch": 1.184, "grad_norm": 0.655094019532376, "learning_rate": 3.777732945955841e-05, "loss": 0.4662, "loss_nan_ranks": 0, "loss_rank_avg": 0.49596506357192993, "step": 740, "valid_targets_mean": 4551.3, "valid_targets_min": 786 }, { "epoch": 1.192, "grad_norm": 0.6319621366301474, "learning_rate": 3.772586559429229e-05, "loss": 0.4796, "loss_nan_ranks": 0, "loss_rank_avg": 0.5001382231712341, "step": 745, "valid_targets_mean": 3626.9, "valid_targets_min": 925 }, { "epoch": 1.2, "grad_norm": 0.5834982530634643, "learning_rate": 3.767384861401636e-05, "loss": 0.4846, "loss_nan_ranks": 0, "loss_rank_avg": 0.4496903419494629, "step": 750, "valid_targets_mean": 3643.1, "valid_targets_min": 861 }, { "epoch": 1.208, "grad_norm": 0.7608316000603859, "learning_rate": 3.762128014185998e-05, "loss": 0.5195, "loss_nan_ranks": 0, "loss_rank_avg": 0.5569829344749451, "step": 755, "valid_targets_mean": 2473.0, "valid_targets_min": 473 }, { "epoch": 1.216, "grad_norm": 0.5591412298342779, "learning_rate": 3.7568161818161135e-05, "loss": 0.4618, "loss_nan_ranks": 0, "loss_rank_avg": 0.49754035472869873, "step": 760, "valid_targets_mean": 6402.6, "valid_targets_min": 1106 }, { "epoch": 1.224, "grad_norm": 0.8123260766469027, "learning_rate": 3.751449530041532e-05, "loss": 0.4975, "loss_nan_ranks": 0, "loss_rank_avg": 0.5815908908843994, "step": 765, "valid_targets_mean": 2641.3, "valid_targets_min": 795 }, { "epoch": 1.232, "grad_norm": 0.6101133475673763, "learning_rate": 3.7460282263223764e-05, "loss": 0.4819, "loss_nan_ranks": 0, "loss_rank_avg": 0.4854985475540161, "step": 770, "valid_targets_mean": 4249.1, "valid_targets_min": 590 }, { "epoch": 1.24, "grad_norm": 0.5375018128815826, "learning_rate": 3.740552439824122e-05, "loss": 0.486, "loss_nan_ranks": 0, "loss_rank_avg": 0.4772304594516754, "step": 775, "valid_targets_mean": 4520.9, "valid_targets_min": 801 }, { "epoch": 1.248, "grad_norm": 0.51139105848858, "learning_rate": 3.735022341412314e-05, "loss": 0.477, "loss_nan_ranks": 0, "loss_rank_avg": 0.47216999530792236, "step": 780, "valid_targets_mean": 6126.1, "valid_targets_min": 738 }, { "epoch": 1.256, "grad_norm": 0.5195276484071466, "learning_rate": 3.7294381036472386e-05, "loss": 0.4797, "loss_nan_ranks": 0, "loss_rank_avg": 0.46269387006759644, "step": 785, "valid_targets_mean": 4958.2, "valid_targets_min": 667 }, { "epoch": 1.264, "grad_norm": 0.6267531451482399, "learning_rate": 3.723799900778538e-05, "loss": 0.4883, "loss_nan_ranks": 0, "loss_rank_avg": 0.4804888665676117, "step": 790, "valid_targets_mean": 3517.1, "valid_targets_min": 578 }, { "epoch": 1.272, "grad_norm": 0.5195057436049215, "learning_rate": 3.7181079087397705e-05, "loss": 0.5087, "loss_nan_ranks": 0, "loss_rank_avg": 0.5404967665672302, "step": 795, "valid_targets_mean": 6603.4, "valid_targets_min": 989 }, { "epoch": 1.28, "grad_norm": 0.5398694980743434, "learning_rate": 3.712362305142926e-05, "loss": 0.4393, "loss_nan_ranks": 0, "loss_rank_avg": 0.4667036831378937, "step": 800, "valid_targets_mean": 4789.3, "valid_targets_min": 512 }, { "epoch": 1.288, "grad_norm": 0.700472880561129, "learning_rate": 3.706563269272878e-05, "loss": 0.4852, "loss_nan_ranks": 0, "loss_rank_avg": 0.45391908288002014, "step": 805, "valid_targets_mean": 2936.9, "valid_targets_min": 646 }, { "epoch": 1.296, "grad_norm": 0.6212205565818121, "learning_rate": 3.700710982081794e-05, "loss": 0.4758, "loss_nan_ranks": 0, "loss_rank_avg": 0.4718417823314667, "step": 810, "valid_targets_mean": 3432.1, "valid_targets_min": 782 }, { "epoch": 1.304, "grad_norm": 0.6408053164272295, "learning_rate": 3.694805626183486e-05, "loss": 0.4872, "loss_nan_ranks": 0, "loss_rank_avg": 0.5090664625167847, "step": 815, "valid_targets_mean": 4112.0, "valid_targets_min": 860 }, { "epoch": 1.312, "grad_norm": 0.7001179282352733, "learning_rate": 3.688847385847711e-05, "loss": 0.4719, "loss_nan_ranks": 0, "loss_rank_avg": 0.427803099155426, "step": 820, "valid_targets_mean": 3749.1, "valid_targets_min": 1009 }, { "epoch": 1.32, "grad_norm": 0.6511396074795565, "learning_rate": 3.682836446994428e-05, "loss": 0.5126, "loss_nan_ranks": 0, "loss_rank_avg": 0.4967302083969116, "step": 825, "valid_targets_mean": 3997.8, "valid_targets_min": 718 }, { "epoch": 1.328, "grad_norm": 0.593371544756325, "learning_rate": 3.676772997187989e-05, "loss": 0.4859, "loss_nan_ranks": 0, "loss_rank_avg": 0.475099116563797, "step": 830, "valid_targets_mean": 4375.9, "valid_targets_min": 719 }, { "epoch": 1.336, "grad_norm": 0.6550709845667457, "learning_rate": 3.670657225631289e-05, "loss": 0.4717, "loss_nan_ranks": 0, "loss_rank_avg": 0.5210568904876709, "step": 835, "valid_targets_mean": 3314.4, "valid_targets_min": 662 }, { "epoch": 1.3439999999999999, "grad_norm": 0.6168115512142734, "learning_rate": 3.6644893231598635e-05, "loss": 0.4695, "loss_nan_ranks": 0, "loss_rank_avg": 0.5161671042442322, "step": 840, "valid_targets_mean": 3743.0, "valid_targets_min": 552 }, { "epoch": 1.3519999999999999, "grad_norm": 1.2636079758657017, "learning_rate": 3.658269482235932e-05, "loss": 0.4564, "loss_nan_ranks": 0, "loss_rank_avg": 0.433046817779541, "step": 845, "valid_targets_mean": 2702.0, "valid_targets_min": 640 }, { "epoch": 1.3599999999999999, "grad_norm": 0.644979165166496, "learning_rate": 3.651997896942394e-05, "loss": 0.525, "loss_nan_ranks": 0, "loss_rank_avg": 0.5414267778396606, "step": 850, "valid_targets_mean": 4083.8, "valid_targets_min": 815 }, { "epoch": 1.3679999999999999, "grad_norm": 0.5287402349737144, "learning_rate": 3.645674762976769e-05, "loss": 0.4819, "loss_nan_ranks": 0, "loss_rank_avg": 0.43094199895858765, "step": 855, "valid_targets_mean": 4626.4, "valid_targets_min": 583 }, { "epoch": 1.376, "grad_norm": 0.609279043498901, "learning_rate": 3.639300277645096e-05, "loss": 0.475, "loss_nan_ranks": 0, "loss_rank_avg": 0.49126923084259033, "step": 860, "valid_targets_mean": 4487.2, "valid_targets_min": 900 }, { "epoch": 1.384, "grad_norm": 0.645279015628696, "learning_rate": 3.6328746398557715e-05, "loss": 0.4776, "loss_nan_ranks": 0, "loss_rank_avg": 0.500410795211792, "step": 865, "valid_targets_mean": 3312.8, "valid_targets_min": 447 }, { "epoch": 1.392, "grad_norm": 0.6099569963138785, "learning_rate": 3.6263980501133466e-05, "loss": 0.4815, "loss_nan_ranks": 0, "loss_rank_avg": 0.4480360150337219, "step": 870, "valid_targets_mean": 4288.2, "valid_targets_min": 658 }, { "epoch": 1.4, "grad_norm": 0.5594438345209423, "learning_rate": 3.619870710512268e-05, "loss": 0.4831, "loss_nan_ranks": 0, "loss_rank_avg": 0.46603310108184814, "step": 875, "valid_targets_mean": 4127.5, "valid_targets_min": 810 }, { "epoch": 1.408, "grad_norm": 0.7040863724335125, "learning_rate": 3.6132928247305713e-05, "loss": 0.4717, "loss_nan_ranks": 0, "loss_rank_avg": 0.4972262978553772, "step": 880, "valid_targets_mean": 4002.6, "valid_targets_min": 811 }, { "epoch": 1.416, "grad_norm": 0.4723530474869513, "learning_rate": 3.60666459802353e-05, "loss": 0.4683, "loss_nan_ranks": 0, "loss_rank_avg": 0.45891261100769043, "step": 885, "valid_targets_mean": 6404.6, "valid_targets_min": 860 }, { "epoch": 1.424, "grad_norm": 0.6504389615505566, "learning_rate": 3.599986237217245e-05, "loss": 0.4589, "loss_nan_ranks": 0, "loss_rank_avg": 0.49261537194252014, "step": 890, "valid_targets_mean": 3836.1, "valid_targets_min": 597 }, { "epoch": 1.432, "grad_norm": 0.5829046317294024, "learning_rate": 3.593257950702194e-05, "loss": 0.4371, "loss_nan_ranks": 0, "loss_rank_avg": 0.41449180245399475, "step": 895, "valid_targets_mean": 4351.4, "valid_targets_min": 373 }, { "epoch": 1.44, "grad_norm": 0.6928805501763816, "learning_rate": 3.586479948426728e-05, "loss": 0.483, "loss_nan_ranks": 0, "loss_rank_avg": 0.44609037041664124, "step": 900, "valid_targets_mean": 4319.5, "valid_targets_min": 763 }, { "epoch": 1.448, "grad_norm": 0.6643739326321101, "learning_rate": 3.579652441890523e-05, "loss": 0.4797, "loss_nan_ranks": 0, "loss_rank_avg": 0.49757933616638184, "step": 905, "valid_targets_mean": 3803.0, "valid_targets_min": 657 }, { "epoch": 1.456, "grad_norm": 0.6143346831296209, "learning_rate": 3.572775644137974e-05, "loss": 0.4814, "loss_nan_ranks": 0, "loss_rank_avg": 0.4909932315349579, "step": 910, "valid_targets_mean": 3776.1, "valid_targets_min": 765 }, { "epoch": 1.464, "grad_norm": 0.529180684369256, "learning_rate": 3.5658497697515534e-05, "loss": 0.4566, "loss_nan_ranks": 0, "loss_rank_avg": 0.5000677704811096, "step": 915, "valid_targets_mean": 6069.6, "valid_targets_min": 812 }, { "epoch": 1.472, "grad_norm": 0.6497553129314787, "learning_rate": 3.558875034845113e-05, "loss": 0.4521, "loss_nan_ranks": 0, "loss_rank_avg": 0.4506397545337677, "step": 920, "valid_targets_mean": 3804.3, "valid_targets_min": 560 }, { "epoch": 1.48, "grad_norm": 0.4920648541618837, "learning_rate": 3.551851657057139e-05, "loss": 0.4807, "loss_nan_ranks": 0, "loss_rank_avg": 0.4525104761123657, "step": 925, "valid_targets_mean": 7107.8, "valid_targets_min": 635 }, { "epoch": 1.488, "grad_norm": 0.7772212863915389, "learning_rate": 3.544779855543963e-05, "loss": 0.4908, "loss_nan_ranks": 0, "loss_rank_avg": 0.4923700988292694, "step": 930, "valid_targets_mean": 2380.3, "valid_targets_min": 624 }, { "epoch": 1.496, "grad_norm": 0.5981404469738086, "learning_rate": 3.5376598509729226e-05, "loss": 0.4821, "loss_nan_ranks": 0, "loss_rank_avg": 0.4714764952659607, "step": 935, "valid_targets_mean": 4965.3, "valid_targets_min": 563 }, { "epoch": 1.504, "grad_norm": 0.7007792353553981, "learning_rate": 3.5304918655154754e-05, "loss": 0.4921, "loss_nan_ranks": 0, "loss_rank_avg": 0.4638800024986267, "step": 940, "valid_targets_mean": 3031.9, "valid_targets_min": 817 }, { "epoch": 1.512, "grad_norm": 0.6097210410936542, "learning_rate": 3.523276122840266e-05, "loss": 0.4893, "loss_nan_ranks": 0, "loss_rank_avg": 0.483889102935791, "step": 945, "valid_targets_mean": 3922.5, "valid_targets_min": 767 }, { "epoch": 1.52, "grad_norm": 0.7526194303601134, "learning_rate": 3.516012848106149e-05, "loss": 0.4721, "loss_nan_ranks": 0, "loss_rank_avg": 0.5017504692077637, "step": 950, "valid_targets_mean": 2572.9, "valid_targets_min": 755 }, { "epoch": 1.528, "grad_norm": 0.46052003404390657, "learning_rate": 3.5087022679551614e-05, "loss": 0.4661, "loss_nan_ranks": 0, "loss_rank_avg": 0.41806378960609436, "step": 955, "valid_targets_mean": 5992.2, "valid_targets_min": 751 }, { "epoch": 1.536, "grad_norm": 0.6989167426008138, "learning_rate": 3.5013446105054486e-05, "loss": 0.4931, "loss_nan_ranks": 0, "loss_rank_avg": 0.5233873128890991, "step": 960, "valid_targets_mean": 2915.4, "valid_targets_min": 935 }, { "epoch": 1.544, "grad_norm": 0.7730262333694343, "learning_rate": 3.493940105344152e-05, "loss": 0.4765, "loss_nan_ranks": 0, "loss_rank_avg": 0.45950740575790405, "step": 965, "valid_targets_mean": 3003.8, "valid_targets_min": 601 }, { "epoch": 1.552, "grad_norm": 0.6796211235694684, "learning_rate": 3.4864889835202366e-05, "loss": 0.4963, "loss_nan_ranks": 0, "loss_rank_avg": 0.4867255389690399, "step": 970, "valid_targets_mean": 5517.5, "valid_targets_min": 1042 }, { "epoch": 1.56, "grad_norm": 0.882468209296317, "learning_rate": 3.4789914775372905e-05, "loss": 0.4956, "loss_nan_ranks": 0, "loss_rank_avg": 0.5269094705581665, "step": 975, "valid_targets_mean": 2169.9, "valid_targets_min": 816 }, { "epoch": 1.568, "grad_norm": 0.6427886583581234, "learning_rate": 3.471447821346264e-05, "loss": 0.4946, "loss_nan_ranks": 0, "loss_rank_avg": 0.5064438581466675, "step": 980, "valid_targets_mean": 3668.9, "valid_targets_min": 802 }, { "epoch": 1.576, "grad_norm": 0.4751807016740937, "learning_rate": 3.463858250338168e-05, "loss": 0.478, "loss_nan_ranks": 0, "loss_rank_avg": 0.4542385935783386, "step": 985, "valid_targets_mean": 7338.5, "valid_targets_min": 371 }, { "epoch": 1.584, "grad_norm": 0.5520255318021472, "learning_rate": 3.4562230013367374e-05, "loss": 0.467, "loss_nan_ranks": 0, "loss_rank_avg": 0.47316858172416687, "step": 990, "valid_targets_mean": 4990.0, "valid_targets_min": 786 }, { "epoch": 1.592, "grad_norm": 0.5538940893817148, "learning_rate": 3.448542312591032e-05, "loss": 0.4678, "loss_nan_ranks": 0, "loss_rank_avg": 0.43099331855773926, "step": 995, "valid_targets_mean": 4630.3, "valid_targets_min": 766 }, { "epoch": 1.6, "grad_norm": 0.49936258088092805, "learning_rate": 3.440816423768007e-05, "loss": 0.4788, "loss_nan_ranks": 0, "loss_rank_avg": 0.4556550085544586, "step": 1000, "valid_targets_mean": 5382.2, "valid_targets_min": 557 }, { "epoch": 1.608, "grad_norm": 0.6836362049629878, "learning_rate": 3.433045575945031e-05, "loss": 0.4665, "loss_nan_ranks": 0, "loss_rank_avg": 0.5310341119766235, "step": 1005, "valid_targets_mean": 2997.4, "valid_targets_min": 705 }, { "epoch": 1.616, "grad_norm": 0.5410029620184749, "learning_rate": 3.42523001160237e-05, "loss": 0.4847, "loss_nan_ranks": 0, "loss_rank_avg": 0.47429001331329346, "step": 1010, "valid_targets_mean": 4692.7, "valid_targets_min": 418 }, { "epoch": 1.624, "grad_norm": 0.536446225642749, "learning_rate": 3.417369974615615e-05, "loss": 0.4739, "loss_nan_ranks": 0, "loss_rank_avg": 0.44192248582839966, "step": 1015, "valid_targets_mean": 4533.6, "valid_targets_min": 748 }, { "epoch": 1.6320000000000001, "grad_norm": 0.654586453788874, "learning_rate": 3.409465710248074e-05, "loss": 0.4511, "loss_nan_ranks": 0, "loss_rank_avg": 0.4620225727558136, "step": 1020, "valid_targets_mean": 3155.8, "valid_targets_min": 497 }, { "epoch": 1.6400000000000001, "grad_norm": 0.6885658704413448, "learning_rate": 3.401517465143119e-05, "loss": 0.4594, "loss_nan_ranks": 0, "loss_rank_avg": 0.47382691502571106, "step": 1025, "valid_targets_mean": 2903.8, "valid_targets_min": 511 }, { "epoch": 1.6480000000000001, "grad_norm": 0.5838313777450732, "learning_rate": 3.393525487316489e-05, "loss": 0.4802, "loss_nan_ranks": 0, "loss_rank_avg": 0.4386270344257355, "step": 1030, "valid_targets_mean": 3802.1, "valid_targets_min": 917 }, { "epoch": 1.6560000000000001, "grad_norm": 0.5475012361833616, "learning_rate": 3.385490026148554e-05, "loss": 0.4882, "loss_nan_ranks": 0, "loss_rank_avg": 0.47070324420928955, "step": 1035, "valid_targets_mean": 4426.2, "valid_targets_min": 556 }, { "epoch": 1.6640000000000001, "grad_norm": 0.5825446614405908, "learning_rate": 3.377411332376529e-05, "loss": 0.4871, "loss_nan_ranks": 0, "loss_rank_avg": 0.49477773904800415, "step": 1040, "valid_targets_mean": 3675.8, "valid_targets_min": 693 }, { "epoch": 1.6720000000000002, "grad_norm": 0.5522875927498029, "learning_rate": 3.369289658086651e-05, "loss": 0.4774, "loss_nan_ranks": 0, "loss_rank_avg": 0.4486830234527588, "step": 1045, "valid_targets_mean": 4307.8, "valid_targets_min": 513 }, { "epoch": 1.6800000000000002, "grad_norm": 0.5439529345170723, "learning_rate": 3.3611252567063184e-05, "loss": 0.4639, "loss_nan_ranks": 0, "loss_rank_avg": 0.4867696166038513, "step": 1050, "valid_targets_mean": 4714.7, "valid_targets_min": 860 }, { "epoch": 1.688, "grad_norm": 0.753150960893063, "learning_rate": 3.352918382996174e-05, "loss": 0.4951, "loss_nan_ranks": 0, "loss_rank_avg": 0.5364462733268738, "step": 1055, "valid_targets_mean": 3034.1, "valid_targets_min": 622 }, { "epoch": 1.696, "grad_norm": 0.5840969444722356, "learning_rate": 3.344669293042163e-05, "loss": 0.4687, "loss_nan_ranks": 0, "loss_rank_avg": 0.5160173177719116, "step": 1060, "valid_targets_mean": 4426.8, "valid_targets_min": 785 }, { "epoch": 1.704, "grad_norm": 0.6291215602430366, "learning_rate": 3.336378244247539e-05, "loss": 0.4774, "loss_nan_ranks": 0, "loss_rank_avg": 0.48828327655792236, "step": 1065, "valid_targets_mean": 3159.0, "valid_targets_min": 579 }, { "epoch": 1.712, "grad_norm": 0.7157439156221113, "learning_rate": 3.3280454953248326e-05, "loss": 0.4719, "loss_nan_ranks": 0, "loss_rank_avg": 0.46565982699394226, "step": 1070, "valid_targets_mean": 2837.4, "valid_targets_min": 479 }, { "epoch": 1.72, "grad_norm": 0.8834848909249748, "learning_rate": 3.3196713062877765e-05, "loss": 0.4555, "loss_nan_ranks": 0, "loss_rank_avg": 0.47797664999961853, "step": 1075, "valid_targets_mean": 2625.6, "valid_targets_min": 588 }, { "epoch": 1.728, "grad_norm": 0.47069533464521623, "learning_rate": 3.311255938443196e-05, "loss": 0.467, "loss_nan_ranks": 0, "loss_rank_avg": 0.4377485513687134, "step": 1080, "valid_targets_mean": 5656.1, "valid_targets_min": 735 }, { "epoch": 1.736, "grad_norm": 0.5994529019656026, "learning_rate": 3.3027996543828524e-05, "loss": 0.4708, "loss_nan_ranks": 0, "loss_rank_avg": 0.5046743154525757, "step": 1085, "valid_targets_mean": 3614.6, "valid_targets_min": 619 }, { "epoch": 1.744, "grad_norm": 0.6262011025182842, "learning_rate": 3.2943027179752494e-05, "loss": 0.4717, "loss_nan_ranks": 0, "loss_rank_avg": 0.4625643789768219, "step": 1090, "valid_targets_mean": 4390.8, "valid_targets_min": 701 }, { "epoch": 1.752, "grad_norm": 0.505027541492022, "learning_rate": 3.285765394357401e-05, "loss": 0.4815, "loss_nan_ranks": 0, "loss_rank_avg": 0.46330204606056213, "step": 1095, "valid_targets_mean": 5306.1, "valid_targets_min": 661 }, { "epoch": 1.76, "grad_norm": 0.514752245979312, "learning_rate": 3.277187949926556e-05, "loss": 0.4531, "loss_nan_ranks": 0, "loss_rank_avg": 0.41437840461730957, "step": 1100, "valid_targets_mean": 6349.6, "valid_targets_min": 759 }, { "epoch": 1.768, "grad_norm": 0.5757785700580377, "learning_rate": 3.268570652331888e-05, "loss": 0.4744, "loss_nan_ranks": 0, "loss_rank_avg": 0.43744391202926636, "step": 1105, "valid_targets_mean": 3915.2, "valid_targets_min": 686 }, { "epoch": 1.776, "grad_norm": 0.5909523847823849, "learning_rate": 3.2599137704661405e-05, "loss": 0.4395, "loss_nan_ranks": 0, "loss_rank_avg": 0.46480992436408997, "step": 1110, "valid_targets_mean": 3621.0, "valid_targets_min": 468 }, { "epoch": 1.784, "grad_norm": 0.6522784519010079, "learning_rate": 3.251217574457239e-05, "loss": 0.4716, "loss_nan_ranks": 0, "loss_rank_avg": 0.4484942555427551, "step": 1115, "valid_targets_mean": 3802.8, "valid_targets_min": 686 }, { "epoch": 1.792, "grad_norm": 0.6076639765753343, "learning_rate": 3.242482335659861e-05, "loss": 0.4756, "loss_nan_ranks": 0, "loss_rank_avg": 0.41383862495422363, "step": 1120, "valid_targets_mean": 3632.5, "valid_targets_min": 737 }, { "epoch": 1.8, "grad_norm": 1.4930741994185592, "learning_rate": 3.2337083266469687e-05, "loss": 0.4672, "loss_nan_ranks": 0, "loss_rank_avg": 0.49689725041389465, "step": 1125, "valid_targets_mean": 4094.2, "valid_targets_min": 918 }, { "epoch": 1.808, "grad_norm": 0.5983765842540781, "learning_rate": 3.224895821201304e-05, "loss": 0.4772, "loss_nan_ranks": 0, "loss_rank_avg": 0.43095701932907104, "step": 1130, "valid_targets_mean": 3158.9, "valid_targets_min": 505 }, { "epoch": 1.8159999999999998, "grad_norm": 0.7377527167711196, "learning_rate": 3.2160450943068446e-05, "loss": 0.494, "loss_nan_ranks": 0, "loss_rank_avg": 0.4552053213119507, "step": 1135, "valid_targets_mean": 3169.7, "valid_targets_min": 615 }, { "epoch": 1.8239999999999998, "grad_norm": 0.48209247220176665, "learning_rate": 3.207156422140225e-05, "loss": 0.4665, "loss_nan_ranks": 0, "loss_rank_avg": 0.40306127071380615, "step": 1140, "valid_targets_mean": 5754.3, "valid_targets_min": 633 }, { "epoch": 1.8319999999999999, "grad_norm": 0.6434012245459063, "learning_rate": 3.198230082062115e-05, "loss": 0.4792, "loss_nan_ranks": 0, "loss_rank_avg": 0.4668983817100525, "step": 1145, "valid_targets_mean": 3197.2, "valid_targets_min": 538 }, { "epoch": 1.8399999999999999, "grad_norm": 0.6995497102738215, "learning_rate": 3.189266352608574e-05, "loss": 0.4839, "loss_nan_ranks": 0, "loss_rank_avg": 0.5054548978805542, "step": 1150, "valid_targets_mean": 2973.6, "valid_targets_min": 614 }, { "epoch": 1.8479999999999999, "grad_norm": 0.569099812286609, "learning_rate": 3.180265513482345e-05, "loss": 0.4843, "loss_nan_ranks": 0, "loss_rank_avg": 0.5099635720252991, "step": 1155, "valid_targets_mean": 4445.8, "valid_targets_min": 1057 }, { "epoch": 1.8559999999999999, "grad_norm": 0.7118854281438147, "learning_rate": 3.171227845544143e-05, "loss": 0.5015, "loss_nan_ranks": 0, "loss_rank_avg": 0.5411723852157593, "step": 1160, "valid_targets_mean": 3062.9, "valid_targets_min": 1004 }, { "epoch": 1.8639999999999999, "grad_norm": 0.4667067376045219, "learning_rate": 3.162153630803877e-05, "loss": 0.4776, "loss_nan_ranks": 0, "loss_rank_avg": 0.42969876527786255, "step": 1165, "valid_targets_mean": 6657.2, "valid_targets_min": 701 }, { "epoch": 1.8719999999999999, "grad_norm": 0.5574951952526179, "learning_rate": 3.153043152411861e-05, "loss": 0.4466, "loss_nan_ranks": 0, "loss_rank_avg": 0.3883597254753113, "step": 1170, "valid_targets_mean": 3929.1, "valid_targets_min": 714 }, { "epoch": 1.88, "grad_norm": 0.49857166985641654, "learning_rate": 3.14389669464997e-05, "loss": 0.4621, "loss_nan_ranks": 0, "loss_rank_avg": 0.4549972712993622, "step": 1175, "valid_targets_mean": 5751.6, "valid_targets_min": 602 }, { "epoch": 1.888, "grad_norm": 0.7211688221308987, "learning_rate": 3.134714542922777e-05, "loss": 0.4731, "loss_nan_ranks": 0, "loss_rank_avg": 0.4644148647785187, "step": 1180, "valid_targets_mean": 4079.8, "valid_targets_min": 539 }, { "epoch": 1.896, "grad_norm": 0.5649019306883653, "learning_rate": 3.1254969837486425e-05, "loss": 0.4801, "loss_nan_ranks": 0, "loss_rank_avg": 0.47578728199005127, "step": 1185, "valid_targets_mean": 4451.9, "valid_targets_min": 496 }, { "epoch": 1.904, "grad_norm": 0.8113490782507405, "learning_rate": 3.116244304750774e-05, "loss": 0.4841, "loss_nan_ranks": 0, "loss_rank_avg": 0.49812304973602295, "step": 1190, "valid_targets_mean": 4256.6, "valid_targets_min": 785 }, { "epoch": 1.912, "grad_norm": 0.6475596871318647, "learning_rate": 3.106956794648254e-05, "loss": 0.4662, "loss_nan_ranks": 0, "loss_rank_avg": 0.4609223008155823, "step": 1195, "valid_targets_mean": 3238.9, "valid_targets_min": 746 }, { "epoch": 1.92, "grad_norm": 0.5392636163279259, "learning_rate": 3.097634743247026e-05, "loss": 0.4366, "loss_nan_ranks": 0, "loss_rank_avg": 0.4826831519603729, "step": 1200, "valid_targets_mean": 4662.3, "valid_targets_min": 553 }, { "epoch": 1.928, "grad_norm": 0.61215815551841, "learning_rate": 3.08827844143086e-05, "loss": 0.4761, "loss_nan_ranks": 0, "loss_rank_avg": 0.5427641868591309, "step": 1205, "valid_targets_mean": 4324.7, "valid_targets_min": 824 }, { "epoch": 1.936, "grad_norm": 0.5987033728383583, "learning_rate": 3.078888181152264e-05, "loss": 0.4722, "loss_nan_ranks": 0, "loss_rank_avg": 0.42406606674194336, "step": 1210, "valid_targets_mean": 3636.4, "valid_targets_min": 684 }, { "epoch": 1.944, "grad_norm": 0.6515070925773007, "learning_rate": 3.0694642554233855e-05, "loss": 0.4707, "loss_nan_ranks": 0, "loss_rank_avg": 0.47039297223091125, "step": 1215, "valid_targets_mean": 2975.4, "valid_targets_min": 592 }, { "epoch": 1.952, "grad_norm": 0.5388317669629852, "learning_rate": 3.0600069583068594e-05, "loss": 0.4779, "loss_nan_ranks": 0, "loss_rank_avg": 0.4415266513824463, "step": 1220, "valid_targets_mean": 4852.2, "valid_targets_min": 347 }, { "epoch": 1.96, "grad_norm": 0.5138321633547656, "learning_rate": 3.0505165849066394e-05, "loss": 0.4681, "loss_nan_ranks": 0, "loss_rank_avg": 0.4434703290462494, "step": 1225, "valid_targets_mean": 5056.9, "valid_targets_min": 907 }, { "epoch": 1.968, "grad_norm": 0.7612326702075074, "learning_rate": 3.040993431358782e-05, "loss": 0.4736, "loss_nan_ranks": 0, "loss_rank_avg": 0.4785926938056946, "step": 1230, "valid_targets_mean": 2302.5, "valid_targets_min": 845 }, { "epoch": 1.976, "grad_norm": 0.7430168909983449, "learning_rate": 3.031437794822215e-05, "loss": 0.4701, "loss_nan_ranks": 0, "loss_rank_avg": 0.46587690711021423, "step": 1235, "valid_targets_mean": 3480.7, "valid_targets_min": 776 }, { "epoch": 1.984, "grad_norm": 0.8871840326954018, "learning_rate": 3.021849973469455e-05, "loss": 0.5202, "loss_nan_ranks": 0, "loss_rank_avg": 0.5849260687828064, "step": 1240, "valid_targets_mean": 2373.0, "valid_targets_min": 824 }, { "epoch": 1.992, "grad_norm": 0.8005852985566412, "learning_rate": 3.012230266477313e-05, "loss": 0.4786, "loss_nan_ranks": 0, "loss_rank_avg": 0.5191680788993835, "step": 1245, "valid_targets_mean": 2152.9, "valid_targets_min": 834 }, { "epoch": 2.0, "grad_norm": 1.423544343942953, "learning_rate": 3.0025789740175502e-05, "loss": 0.4934, "loss_nan_ranks": 0, "loss_rank_avg": 0.5457210540771484, "step": 1250, "valid_targets_mean": 2222.7, "valid_targets_min": 561 }, { "epoch": 2.008, "grad_norm": 0.5955977900794318, "learning_rate": 2.9928963972475186e-05, "loss": 0.4681, "loss_nan_ranks": 0, "loss_rank_avg": 0.4416769742965698, "step": 1255, "valid_targets_mean": 4610.1, "valid_targets_min": 615 }, { "epoch": 2.016, "grad_norm": 0.6090260458793183, "learning_rate": 2.9831828383007585e-05, "loss": 0.4479, "loss_nan_ranks": 0, "loss_rank_avg": 0.4574715495109558, "step": 1260, "valid_targets_mean": 4182.2, "valid_targets_min": 645 }, { "epoch": 2.024, "grad_norm": 0.7360060643061078, "learning_rate": 2.9734386002775754e-05, "loss": 0.4695, "loss_nan_ranks": 0, "loss_rank_avg": 0.5532656908035278, "step": 1265, "valid_targets_mean": 3207.1, "valid_targets_min": 512 }, { "epoch": 2.032, "grad_norm": 0.6687884359084095, "learning_rate": 2.963663987235577e-05, "loss": 0.4552, "loss_nan_ranks": 0, "loss_rank_avg": 0.426003634929657, "step": 1270, "valid_targets_mean": 2995.4, "valid_targets_min": 730 }, { "epoch": 2.04, "grad_norm": 0.7116484185211367, "learning_rate": 2.95385930418019e-05, "loss": 0.4567, "loss_nan_ranks": 0, "loss_rank_avg": 0.5189186334609985, "step": 1275, "valid_targets_mean": 3110.1, "valid_targets_min": 742 }, { "epoch": 2.048, "grad_norm": 0.7186019588818685, "learning_rate": 2.9440248570551406e-05, "loss": 0.4459, "loss_nan_ranks": 0, "loss_rank_avg": 0.41865086555480957, "step": 1280, "valid_targets_mean": 3671.8, "valid_targets_min": 906 }, { "epoch": 2.056, "grad_norm": 1.4747315343165368, "learning_rate": 2.934160952732907e-05, "loss": 0.449, "loss_nan_ranks": 0, "loss_rank_avg": 0.4162527322769165, "step": 1285, "valid_targets_mean": 3745.3, "valid_targets_min": 306 }, { "epoch": 2.064, "grad_norm": 0.6471086351380495, "learning_rate": 2.9242678990051462e-05, "loss": 0.4433, "loss_nan_ranks": 0, "loss_rank_avg": 0.44822385907173157, "step": 1290, "valid_targets_mean": 4509.1, "valid_targets_min": 576 }, { "epoch": 2.072, "grad_norm": 0.6346182269430595, "learning_rate": 2.9143460045730886e-05, "loss": 0.4487, "loss_nan_ranks": 0, "loss_rank_avg": 0.48546066880226135, "step": 1295, "valid_targets_mean": 3940.9, "valid_targets_min": 876 }, { "epoch": 2.08, "grad_norm": 2.031709545540969, "learning_rate": 2.9043955790379035e-05, "loss": 0.4714, "loss_nan_ranks": 0, "loss_rank_avg": 0.43055105209350586, "step": 1300, "valid_targets_mean": 3915.9, "valid_targets_min": 635 }, { "epoch": 2.088, "grad_norm": 0.6903051949163922, "learning_rate": 2.8944169328910427e-05, "loss": 0.446, "loss_nan_ranks": 0, "loss_rank_avg": 0.48170751333236694, "step": 1305, "valid_targets_mean": 3417.4, "valid_targets_min": 681 }, { "epoch": 2.096, "grad_norm": 0.5332653284325558, "learning_rate": 2.884410377504547e-05, "loss": 0.4159, "loss_nan_ranks": 0, "loss_rank_avg": 0.42663711309432983, "step": 1310, "valid_targets_mean": 5080.2, "valid_targets_min": 640 }, { "epoch": 2.104, "grad_norm": 0.602437990002518, "learning_rate": 2.8743762251213333e-05, "loss": 0.4508, "loss_nan_ranks": 0, "loss_rank_avg": 0.47259843349456787, "step": 1315, "valid_targets_mean": 4112.4, "valid_targets_min": 646 }, { "epoch": 2.112, "grad_norm": 0.676214101245154, "learning_rate": 2.8643147888454507e-05, "loss": 0.4432, "loss_nan_ranks": 0, "loss_rank_avg": 0.4584727883338928, "step": 1320, "valid_targets_mean": 3043.8, "valid_targets_min": 415 }, { "epoch": 2.12, "grad_norm": 0.7428353700968319, "learning_rate": 2.854226382632312e-05, "loss": 0.4713, "loss_nan_ranks": 0, "loss_rank_avg": 0.5539952516555786, "step": 1325, "valid_targets_mean": 3083.6, "valid_targets_min": 624 }, { "epoch": 2.128, "grad_norm": 0.5464368058251405, "learning_rate": 2.844111321278893e-05, "loss": 0.4385, "loss_nan_ranks": 0, "loss_rank_avg": 0.3453948497772217, "step": 1330, "valid_targets_mean": 3928.3, "valid_targets_min": 553 }, { "epoch": 2.136, "grad_norm": 0.5772508201873505, "learning_rate": 2.833969920413913e-05, "loss": 0.4465, "loss_nan_ranks": 0, "loss_rank_avg": 0.4165865182876587, "step": 1335, "valid_targets_mean": 3898.2, "valid_targets_min": 480 }, { "epoch": 2.144, "grad_norm": 0.6774356448413013, "learning_rate": 2.8238024964879857e-05, "loss": 0.4465, "loss_nan_ranks": 0, "loss_rank_avg": 0.4822053015232086, "step": 1340, "valid_targets_mean": 3180.1, "valid_targets_min": 686 }, { "epoch": 2.152, "grad_norm": 0.654073300893228, "learning_rate": 2.8136093667637438e-05, "loss": 0.4496, "loss_nan_ranks": 0, "loss_rank_avg": 0.4197562336921692, "step": 1345, "valid_targets_mean": 2752.7, "valid_targets_min": 518 }, { "epoch": 2.16, "grad_norm": 0.7260228311473961, "learning_rate": 2.8033908493059394e-05, "loss": 0.4643, "loss_nan_ranks": 0, "loss_rank_avg": 0.4236224591732025, "step": 1350, "valid_targets_mean": 3862.9, "valid_targets_min": 977 }, { "epoch": 2.168, "grad_norm": 0.5308213424796718, "learning_rate": 2.793147262971519e-05, "loss": 0.4484, "loss_nan_ranks": 0, "loss_rank_avg": 0.4658902883529663, "step": 1355, "valid_targets_mean": 5550.6, "valid_targets_min": 763 }, { "epoch": 2.176, "grad_norm": 0.5816937352471572, "learning_rate": 2.7828789273996748e-05, "loss": 0.4431, "loss_nan_ranks": 0, "loss_rank_avg": 0.414844274520874, "step": 1360, "valid_targets_mean": 4192.6, "valid_targets_min": 643 }, { "epoch": 2.184, "grad_norm": 0.6690509201863886, "learning_rate": 2.7725861630018703e-05, "loss": 0.4441, "loss_nan_ranks": 0, "loss_rank_avg": 0.41888707876205444, "step": 1365, "valid_targets_mean": 3258.7, "valid_targets_min": 857 }, { "epoch": 2.192, "grad_norm": 0.5315576725263342, "learning_rate": 2.7622692909518423e-05, "loss": 0.4088, "loss_nan_ranks": 0, "loss_rank_avg": 0.4389989376068115, "step": 1370, "valid_targets_mean": 5194.4, "valid_targets_min": 651 }, { "epoch": 2.2, "grad_norm": 0.43861560505715885, "learning_rate": 2.7519286331755766e-05, "loss": 0.4343, "loss_nan_ranks": 0, "loss_rank_avg": 0.36606454849243164, "step": 1375, "valid_targets_mean": 7803.1, "valid_targets_min": 915 }, { "epoch": 2.208, "grad_norm": 0.6860438916170236, "learning_rate": 2.7415645123412672e-05, "loss": 0.4345, "loss_nan_ranks": 0, "loss_rank_avg": 0.4727722704410553, "step": 1380, "valid_targets_mean": 3681.9, "valid_targets_min": 354 }, { "epoch": 2.216, "grad_norm": 0.6233752088365587, "learning_rate": 2.731177251849246e-05, "loss": 0.4469, "loss_nan_ranks": 0, "loss_rank_avg": 0.45124104619026184, "step": 1385, "valid_targets_mean": 3928.4, "valid_targets_min": 409 }, { "epoch": 2.224, "grad_norm": 0.7054017714889622, "learning_rate": 2.7207671758218884e-05, "loss": 0.4511, "loss_nan_ranks": 0, "loss_rank_avg": 0.5190399885177612, "step": 1390, "valid_targets_mean": 3278.7, "valid_targets_min": 668 }, { "epoch": 2.232, "grad_norm": 0.5481452159873532, "learning_rate": 2.710334609093504e-05, "loss": 0.4338, "loss_nan_ranks": 0, "loss_rank_avg": 0.40331241488456726, "step": 1395, "valid_targets_mean": 4237.9, "valid_targets_min": 929 }, { "epoch": 2.24, "grad_norm": 0.5209483042162194, "learning_rate": 2.699879877200198e-05, "loss": 0.4247, "loss_nan_ranks": 0, "loss_rank_avg": 0.367615282535553, "step": 1400, "valid_targets_mean": 4907.4, "valid_targets_min": 718 }, { "epoch": 2.248, "grad_norm": 0.6180610305765888, "learning_rate": 2.6894033063697143e-05, "loss": 0.4649, "loss_nan_ranks": 0, "loss_rank_avg": 0.4836103916168213, "step": 1405, "valid_targets_mean": 4520.9, "valid_targets_min": 778 }, { "epoch": 2.2560000000000002, "grad_norm": 0.624263121954133, "learning_rate": 2.6789052235112554e-05, "loss": 0.4516, "loss_nan_ranks": 0, "loss_rank_avg": 0.4151955544948578, "step": 1410, "valid_targets_mean": 3549.2, "valid_targets_min": 739 }, { "epoch": 2.2640000000000002, "grad_norm": 0.5255415368728256, "learning_rate": 2.66838595620528e-05, "loss": 0.4243, "loss_nan_ranks": 0, "loss_rank_avg": 0.4413053095340729, "step": 1415, "valid_targets_mean": 5782.1, "valid_targets_min": 655 }, { "epoch": 2.2720000000000002, "grad_norm": 0.6397618135971025, "learning_rate": 2.6578458326932842e-05, "loss": 0.4536, "loss_nan_ranks": 0, "loss_rank_avg": 0.44738149642944336, "step": 1420, "valid_targets_mean": 3827.2, "valid_targets_min": 505 }, { "epoch": 2.2800000000000002, "grad_norm": 0.7085382486155277, "learning_rate": 2.6472851818675583e-05, "loss": 0.4633, "loss_nan_ranks": 0, "loss_rank_avg": 0.4663681387901306, "step": 1425, "valid_targets_mean": 2644.9, "valid_targets_min": 646 }, { "epoch": 2.288, "grad_norm": 0.9979890243748089, "learning_rate": 2.6367043332609223e-05, "loss": 0.4459, "loss_nan_ranks": 0, "loss_rank_avg": 0.4871729016304016, "step": 1430, "valid_targets_mean": 1448.7, "valid_targets_min": 744 }, { "epoch": 2.296, "grad_norm": 0.5174927037809555, "learning_rate": 2.6261036170364448e-05, "loss": 0.4251, "loss_nan_ranks": 0, "loss_rank_avg": 0.43367236852645874, "step": 1435, "valid_targets_mean": 5433.6, "valid_targets_min": 783 }, { "epoch": 2.304, "grad_norm": 0.5663046987401196, "learning_rate": 2.6154833639771415e-05, "loss": 0.4422, "loss_nan_ranks": 0, "loss_rank_avg": 0.46524518728256226, "step": 1440, "valid_targets_mean": 5141.4, "valid_targets_min": 730 }, { "epoch": 2.312, "grad_norm": 0.5337110127394135, "learning_rate": 2.6048439054756492e-05, "loss": 0.4463, "loss_nan_ranks": 0, "loss_rank_avg": 0.4362761676311493, "step": 1445, "valid_targets_mean": 5409.9, "valid_targets_min": 1159 }, { "epoch": 2.32, "grad_norm": 0.5463782845574509, "learning_rate": 2.594185573523892e-05, "loss": 0.4565, "loss_nan_ranks": 0, "loss_rank_avg": 0.45417675375938416, "step": 1450, "valid_targets_mean": 4838.8, "valid_targets_min": 619 }, { "epoch": 2.328, "grad_norm": 0.6942365338980392, "learning_rate": 2.583508700702716e-05, "loss": 0.4481, "loss_nan_ranks": 0, "loss_rank_avg": 0.4617985486984253, "step": 1455, "valid_targets_mean": 2802.6, "valid_targets_min": 1014 }, { "epoch": 2.336, "grad_norm": 0.6070000633127984, "learning_rate": 2.572813620171513e-05, "loss": 0.4513, "loss_nan_ranks": 0, "loss_rank_avg": 0.42797911167144775, "step": 1460, "valid_targets_mean": 3719.3, "valid_targets_min": 635 }, { "epoch": 2.344, "grad_norm": 0.4753849685324334, "learning_rate": 2.5621006656578267e-05, "loss": 0.4213, "loss_nan_ranks": 0, "loss_rank_avg": 0.3997453451156616, "step": 1465, "valid_targets_mean": 5544.6, "valid_targets_min": 823 }, { "epoch": 2.352, "grad_norm": 0.5989976974737479, "learning_rate": 2.5513701714469373e-05, "loss": 0.4413, "loss_nan_ranks": 0, "loss_rank_avg": 0.4619044363498688, "step": 1470, "valid_targets_mean": 4001.4, "valid_targets_min": 672 }, { "epoch": 2.36, "grad_norm": 0.5487842792272467, "learning_rate": 2.540622472371429e-05, "loss": 0.4343, "loss_nan_ranks": 0, "loss_rank_avg": 0.4185061454772949, "step": 1475, "valid_targets_mean": 4278.5, "valid_targets_min": 426 }, { "epoch": 2.368, "grad_norm": 0.4402218121898237, "learning_rate": 2.5298579038007478e-05, "loss": 0.4342, "loss_nan_ranks": 0, "loss_rank_avg": 0.3919063210487366, "step": 1480, "valid_targets_mean": 6910.7, "valid_targets_min": 815 }, { "epoch": 2.376, "grad_norm": 0.7373247521439833, "learning_rate": 2.519076801630727e-05, "loss": 0.4723, "loss_nan_ranks": 0, "loss_rank_avg": 0.5013588070869446, "step": 1485, "valid_targets_mean": 2875.9, "valid_targets_min": 658 }, { "epoch": 2.384, "grad_norm": 0.556610693165223, "learning_rate": 2.508279502273117e-05, "loss": 0.4419, "loss_nan_ranks": 0, "loss_rank_avg": 0.43320780992507935, "step": 1490, "valid_targets_mean": 5407.4, "valid_targets_min": 836 }, { "epoch": 2.392, "grad_norm": 0.5098627681422288, "learning_rate": 2.4974663426450798e-05, "loss": 0.4392, "loss_nan_ranks": 0, "loss_rank_avg": 0.4344981908798218, "step": 1495, "valid_targets_mean": 5123.3, "valid_targets_min": 601 }, { "epoch": 2.4, "grad_norm": 0.5039771982978674, "learning_rate": 2.4866376601586798e-05, "loss": 0.4464, "loss_nan_ranks": 0, "loss_rank_avg": 0.4104907810688019, "step": 1500, "valid_targets_mean": 5579.6, "valid_targets_min": 754 }, { "epoch": 2.408, "grad_norm": 0.5096044181569787, "learning_rate": 2.475793792710352e-05, "loss": 0.4174, "loss_nan_ranks": 0, "loss_rank_avg": 0.4063170552253723, "step": 1505, "valid_targets_mean": 5081.4, "valid_targets_min": 488 }, { "epoch": 2.416, "grad_norm": 0.7479128529316125, "learning_rate": 2.4649350786703637e-05, "loss": 0.448, "loss_nan_ranks": 0, "loss_rank_avg": 0.47363007068634033, "step": 1510, "valid_targets_mean": 2561.8, "valid_targets_min": 708 }, { "epoch": 2.424, "grad_norm": 0.5885671758624585, "learning_rate": 2.45406185687225e-05, "loss": 0.4514, "loss_nan_ranks": 0, "loss_rank_avg": 0.44971561431884766, "step": 1515, "valid_targets_mean": 4042.2, "valid_targets_min": 678 }, { "epoch": 2.432, "grad_norm": 0.5915561518938581, "learning_rate": 2.443174466602246e-05, "loss": 0.4559, "loss_nan_ranks": 0, "loss_rank_avg": 0.4462353587150574, "step": 1520, "valid_targets_mean": 3938.5, "valid_targets_min": 765 }, { "epoch": 2.44, "grad_norm": 0.663840439463274, "learning_rate": 2.4322732475886953e-05, "loss": 0.4457, "loss_nan_ranks": 0, "loss_rank_avg": 0.426447331905365, "step": 1525, "valid_targets_mean": 2940.4, "valid_targets_min": 494 }, { "epoch": 2.448, "grad_norm": 0.5258251709290092, "learning_rate": 2.4213585399914528e-05, "loss": 0.4099, "loss_nan_ranks": 0, "loss_rank_avg": 0.4359118640422821, "step": 1530, "valid_targets_mean": 4996.3, "valid_targets_min": 832 }, { "epoch": 2.456, "grad_norm": 0.5279872310300597, "learning_rate": 2.4104306843912687e-05, "loss": 0.4368, "loss_nan_ranks": 0, "loss_rank_avg": 0.4399365186691284, "step": 1535, "valid_targets_mean": 5331.9, "valid_targets_min": 521 }, { "epoch": 2.464, "grad_norm": 0.5035482920980331, "learning_rate": 2.3994900217791615e-05, "loss": 0.4564, "loss_nan_ranks": 0, "loss_rank_avg": 0.4546802341938019, "step": 1540, "valid_targets_mean": 6091.1, "valid_targets_min": 553 }, { "epoch": 2.472, "grad_norm": 0.5503280071758032, "learning_rate": 2.3885368935457762e-05, "loss": 0.4442, "loss_nan_ranks": 0, "loss_rank_avg": 0.38341644406318665, "step": 1545, "valid_targets_mean": 4704.2, "valid_targets_min": 534 }, { "epoch": 2.48, "grad_norm": 0.5122727644758672, "learning_rate": 2.3775716414707355e-05, "loss": 0.4236, "loss_nan_ranks": 0, "loss_rank_avg": 0.39771783351898193, "step": 1550, "valid_targets_mean": 5978.4, "valid_targets_min": 737 }, { "epoch": 2.488, "grad_norm": 0.577359891725372, "learning_rate": 2.36659460771197e-05, "loss": 0.4408, "loss_nan_ranks": 0, "loss_rank_avg": 0.426598459482193, "step": 1555, "valid_targets_mean": 4298.5, "valid_targets_min": 758 }, { "epoch": 2.496, "grad_norm": 0.534664454111424, "learning_rate": 2.3556061347950455e-05, "loss": 0.422, "loss_nan_ranks": 0, "loss_rank_avg": 0.41438737511634827, "step": 1560, "valid_targets_mean": 4637.9, "valid_targets_min": 1024 }, { "epoch": 2.504, "grad_norm": 0.742643154280012, "learning_rate": 2.3446065656024734e-05, "loss": 0.4412, "loss_nan_ranks": 0, "loss_rank_avg": 0.45946621894836426, "step": 1565, "valid_targets_mean": 2557.0, "valid_targets_min": 568 }, { "epoch": 2.512, "grad_norm": 0.5946087395842765, "learning_rate": 2.33359624336301e-05, "loss": 0.4429, "loss_nan_ranks": 0, "loss_rank_avg": 0.44135582447052, "step": 1570, "valid_targets_mean": 3980.1, "valid_targets_min": 614 }, { "epoch": 2.52, "grad_norm": 0.7692556639202142, "learning_rate": 2.3225755116409497e-05, "loss": 0.4855, "loss_nan_ranks": 0, "loss_rank_avg": 0.49565717577934265, "step": 1575, "valid_targets_mean": 2437.8, "valid_targets_min": 702 }, { "epoch": 2.528, "grad_norm": 0.6518050042070153, "learning_rate": 2.311544714325403e-05, "loss": 0.4278, "loss_nan_ranks": 0, "loss_rank_avg": 0.4329824149608612, "step": 1580, "valid_targets_mean": 3308.9, "valid_targets_min": 837 }, { "epoch": 2.536, "grad_norm": 0.7001492648412008, "learning_rate": 2.300504195619563e-05, "loss": 0.4865, "loss_nan_ranks": 0, "loss_rank_avg": 0.47200626134872437, "step": 1585, "valid_targets_mean": 3019.2, "valid_targets_min": 723 }, { "epoch": 2.544, "grad_norm": 0.5775943044286223, "learning_rate": 2.2894543000299697e-05, "loss": 0.4546, "loss_nan_ranks": 0, "loss_rank_avg": 0.4556680917739868, "step": 1590, "valid_targets_mean": 4622.2, "valid_targets_min": 746 }, { "epoch": 2.552, "grad_norm": 0.6300239166139451, "learning_rate": 2.2783953723557572e-05, "loss": 0.4442, "loss_nan_ranks": 0, "loss_rank_avg": 0.47904396057128906, "step": 1595, "valid_targets_mean": 3739.5, "valid_targets_min": 691 }, { "epoch": 2.56, "grad_norm": 0.5521871360992379, "learning_rate": 2.2673277576778946e-05, "loss": 0.4353, "loss_nan_ranks": 0, "loss_rank_avg": 0.4208674728870392, "step": 1600, "valid_targets_mean": 4564.6, "valid_targets_min": 555 }, { "epoch": 2.568, "grad_norm": 0.6820540018619283, "learning_rate": 2.2562518013484208e-05, "loss": 0.4273, "loss_nan_ranks": 0, "loss_rank_avg": 0.40282294154167175, "step": 1605, "valid_targets_mean": 2706.5, "valid_targets_min": 797 }, { "epoch": 2.576, "grad_norm": 0.6077922915119577, "learning_rate": 2.245167848979664e-05, "loss": 0.4142, "loss_nan_ranks": 0, "loss_rank_avg": 0.42859092354774475, "step": 1610, "valid_targets_mean": 3625.1, "valid_targets_min": 509 }, { "epoch": 2.584, "grad_norm": 0.5854962148493413, "learning_rate": 2.23407624643346e-05, "loss": 0.4648, "loss_nan_ranks": 0, "loss_rank_avg": 0.42316490411758423, "step": 1615, "valid_targets_mean": 4010.8, "valid_targets_min": 713 }, { "epoch": 2.592, "grad_norm": 0.544244197674275, "learning_rate": 2.2229773398103606e-05, "loss": 0.4167, "loss_nan_ranks": 0, "loss_rank_avg": 0.40714818239212036, "step": 1620, "valid_targets_mean": 4601.6, "valid_targets_min": 847 }, { "epoch": 2.6, "grad_norm": 0.7315101815116987, "learning_rate": 2.2118714754388323e-05, "loss": 0.4689, "loss_nan_ranks": 0, "loss_rank_avg": 0.4557625651359558, "step": 1625, "valid_targets_mean": 2673.6, "valid_targets_min": 627 }, { "epoch": 2.608, "grad_norm": 0.6392348215128343, "learning_rate": 2.200758999864449e-05, "loss": 0.4095, "loss_nan_ranks": 0, "loss_rank_avg": 0.41079527139663696, "step": 1630, "valid_targets_mean": 3180.8, "valid_targets_min": 492 }, { "epoch": 2.616, "grad_norm": 0.553837921244433, "learning_rate": 2.1896402598390818e-05, "loss": 0.419, "loss_nan_ranks": 0, "loss_rank_avg": 0.3827151358127594, "step": 1635, "valid_targets_mean": 4638.4, "valid_targets_min": 616 }, { "epoch": 2.624, "grad_norm": 0.5193517813868557, "learning_rate": 2.178515602310074e-05, "loss": 0.4533, "loss_nan_ranks": 0, "loss_rank_avg": 0.40580642223358154, "step": 1640, "valid_targets_mean": 4868.9, "valid_targets_min": 799 }, { "epoch": 2.632, "grad_norm": 0.6317219989613208, "learning_rate": 2.1673853744094193e-05, "loss": 0.4467, "loss_nan_ranks": 0, "loss_rank_avg": 0.4618436098098755, "step": 1645, "valid_targets_mean": 3528.4, "valid_targets_min": 583 }, { "epoch": 2.64, "grad_norm": 0.8058706372384056, "learning_rate": 2.1562499234429283e-05, "loss": 0.4757, "loss_nan_ranks": 0, "loss_rank_avg": 0.4898325800895691, "step": 1650, "valid_targets_mean": 2231.3, "valid_targets_min": 507 }, { "epoch": 2.648, "grad_norm": 0.7352262306367001, "learning_rate": 2.1451095968793908e-05, "loss": 0.4389, "loss_nan_ranks": 0, "loss_rank_avg": 0.46522995829582214, "step": 1655, "valid_targets_mean": 3700.2, "valid_targets_min": 542 }, { "epoch": 2.656, "grad_norm": 0.6500232390169828, "learning_rate": 2.1339647423397337e-05, "loss": 0.4385, "loss_nan_ranks": 0, "loss_rank_avg": 0.4323245882987976, "step": 1660, "valid_targets_mean": 3278.4, "valid_targets_min": 774 }, { "epoch": 2.664, "grad_norm": 0.6031196836473796, "learning_rate": 2.122815707586176e-05, "loss": 0.4358, "loss_nan_ranks": 0, "loss_rank_avg": 0.4227977991104126, "step": 1665, "valid_targets_mean": 3835.8, "valid_targets_min": 517 }, { "epoch": 2.672, "grad_norm": 0.5631354627031511, "learning_rate": 2.111662840511373e-05, "loss": 0.4372, "loss_nan_ranks": 0, "loss_rank_avg": 0.40425512194633484, "step": 1670, "valid_targets_mean": 4473.6, "valid_targets_min": 591 }, { "epoch": 2.68, "grad_norm": 0.6281174285703458, "learning_rate": 2.1005064891275638e-05, "loss": 0.4534, "loss_nan_ranks": 0, "loss_rank_avg": 0.4625401496887207, "step": 1675, "valid_targets_mean": 3325.9, "valid_targets_min": 618 }, { "epoch": 2.6879999999999997, "grad_norm": 0.5324007779451334, "learning_rate": 2.0893470015557126e-05, "loss": 0.4137, "loss_nan_ranks": 0, "loss_rank_avg": 0.3770933151245117, "step": 1680, "valid_targets_mean": 4407.2, "valid_targets_min": 854 }, { "epoch": 2.6959999999999997, "grad_norm": 0.5704221627115216, "learning_rate": 2.078184726014643e-05, "loss": 0.4524, "loss_nan_ranks": 0, "loss_rank_avg": 0.4448625445365906, "step": 1685, "valid_targets_mean": 4212.2, "valid_targets_min": 669 }, { "epoch": 2.7039999999999997, "grad_norm": 0.5813144458895672, "learning_rate": 2.0670200108101754e-05, "loss": 0.4548, "loss_nan_ranks": 0, "loss_rank_avg": 0.4791436195373535, "step": 1690, "valid_targets_mean": 4126.9, "valid_targets_min": 473 }, { "epoch": 2.7119999999999997, "grad_norm": 0.8782653466918007, "learning_rate": 2.0558532043242557e-05, "loss": 0.464, "loss_nan_ranks": 0, "loss_rank_avg": 0.453347384929657, "step": 1695, "valid_targets_mean": 1782.1, "valid_targets_min": 564 }, { "epoch": 2.7199999999999998, "grad_norm": 0.5583160859664104, "learning_rate": 2.0446846550040863e-05, "loss": 0.428, "loss_nan_ranks": 0, "loss_rank_avg": 0.4322124719619751, "step": 1700, "valid_targets_mean": 4821.1, "valid_targets_min": 669 }, { "epoch": 2.7279999999999998, "grad_norm": 0.5277977577152152, "learning_rate": 2.033514711351253e-05, "loss": 0.4315, "loss_nan_ranks": 0, "loss_rank_avg": 0.42123162746429443, "step": 1705, "valid_targets_mean": 5344.1, "valid_targets_min": 709 }, { "epoch": 2.7359999999999998, "grad_norm": 0.498988724319745, "learning_rate": 2.022343721910851e-05, "loss": 0.4515, "loss_nan_ranks": 0, "loss_rank_avg": 0.44440481066703796, "step": 1710, "valid_targets_mean": 6055.7, "valid_targets_min": 701 }, { "epoch": 2.7439999999999998, "grad_norm": 0.5688318880815759, "learning_rate": 2.0111720352606054e-05, "loss": 0.4293, "loss_nan_ranks": 0, "loss_rank_avg": 0.4387132227420807, "step": 1715, "valid_targets_mean": 4293.4, "valid_targets_min": 1000 }, { "epoch": 2.752, "grad_norm": 0.5541189383448685, "learning_rate": 2e-05, "loss": 0.4345, "loss_nan_ranks": 0, "loss_rank_avg": 0.42854511737823486, "step": 1720, "valid_targets_mean": 4060.4, "valid_targets_min": 1006 }, { "epoch": 2.76, "grad_norm": 0.5636891444562256, "learning_rate": 1.988827964739395e-05, "loss": 0.4569, "loss_nan_ranks": 0, "loss_rank_avg": 0.4016704559326172, "step": 1725, "valid_targets_mean": 4026.6, "valid_targets_min": 556 }, { "epoch": 2.768, "grad_norm": 0.5707069098467574, "learning_rate": 1.9776562780891494e-05, "loss": 0.4277, "loss_nan_ranks": 0, "loss_rank_avg": 0.3943563997745514, "step": 1730, "valid_targets_mean": 3987.3, "valid_targets_min": 769 }, { "epoch": 2.776, "grad_norm": 0.46274734962368136, "learning_rate": 1.966485288648747e-05, "loss": 0.4408, "loss_nan_ranks": 0, "loss_rank_avg": 0.38267430663108826, "step": 1735, "valid_targets_mean": 6487.2, "valid_targets_min": 785 }, { "epoch": 2.784, "grad_norm": 0.5235450558438844, "learning_rate": 1.9553153449959144e-05, "loss": 0.4465, "loss_nan_ranks": 0, "loss_rank_avg": 0.40462425351142883, "step": 1740, "valid_targets_mean": 4937.6, "valid_targets_min": 741 }, { "epoch": 2.792, "grad_norm": 0.4835906686418437, "learning_rate": 1.9441467956757453e-05, "loss": 0.4716, "loss_nan_ranks": 0, "loss_rank_avg": 0.4714353680610657, "step": 1745, "valid_targets_mean": 6360.5, "valid_targets_min": 654 }, { "epoch": 2.8, "grad_norm": 0.5100225254528137, "learning_rate": 1.9329799891898256e-05, "loss": 0.4314, "loss_nan_ranks": 0, "loss_rank_avg": 0.4558408260345459, "step": 1750, "valid_targets_mean": 5252.3, "valid_targets_min": 707 }, { "epoch": 2.808, "grad_norm": 0.7039108297346979, "learning_rate": 1.9218152739853576e-05, "loss": 0.4289, "loss_nan_ranks": 0, "loss_rank_avg": 0.3978843688964844, "step": 1755, "valid_targets_mean": 4321.9, "valid_targets_min": 725 }, { "epoch": 2.816, "grad_norm": 0.5642168770075947, "learning_rate": 1.9106529984442884e-05, "loss": 0.4398, "loss_nan_ranks": 0, "loss_rank_avg": 0.45182371139526367, "step": 1760, "valid_targets_mean": 4244.4, "valid_targets_min": 552 }, { "epoch": 2.824, "grad_norm": 0.6525684551205236, "learning_rate": 1.8994935108724366e-05, "loss": 0.4521, "loss_nan_ranks": 0, "loss_rank_avg": 0.4625537395477295, "step": 1765, "valid_targets_mean": 3384.4, "valid_targets_min": 838 }, { "epoch": 2.832, "grad_norm": 0.5604654454033197, "learning_rate": 1.8883371594886276e-05, "loss": 0.4497, "loss_nan_ranks": 0, "loss_rank_avg": 0.4158444404602051, "step": 1770, "valid_targets_mean": 5318.2, "valid_targets_min": 598 }, { "epoch": 2.84, "grad_norm": 0.7805033021518426, "learning_rate": 1.877184292413824e-05, "loss": 0.4403, "loss_nan_ranks": 0, "loss_rank_avg": 0.46476149559020996, "step": 1775, "valid_targets_mean": 2218.4, "valid_targets_min": 762 }, { "epoch": 2.848, "grad_norm": 0.632248312453592, "learning_rate": 1.8660352576602663e-05, "loss": 0.4715, "loss_nan_ranks": 0, "loss_rank_avg": 0.4902727007865906, "step": 1780, "valid_targets_mean": 4020.6, "valid_targets_min": 848 }, { "epoch": 2.856, "grad_norm": 0.8500750674610906, "learning_rate": 1.8548904031206102e-05, "loss": 0.4558, "loss_nan_ranks": 0, "loss_rank_avg": 0.5282151103019714, "step": 1785, "valid_targets_mean": 2049.2, "valid_targets_min": 489 }, { "epoch": 2.864, "grad_norm": 0.5478175008230401, "learning_rate": 1.843750076557072e-05, "loss": 0.4351, "loss_nan_ranks": 0, "loss_rank_avg": 0.4625953137874603, "step": 1790, "valid_targets_mean": 4885.2, "valid_targets_min": 506 }, { "epoch": 2.872, "grad_norm": 0.6244527229546376, "learning_rate": 1.832614625590581e-05, "loss": 0.4246, "loss_nan_ranks": 0, "loss_rank_avg": 0.43290358781814575, "step": 1795, "valid_targets_mean": 3414.3, "valid_targets_min": 574 }, { "epoch": 2.88, "grad_norm": 0.8234587446370712, "learning_rate": 1.8214843976899264e-05, "loss": 0.454, "loss_nan_ranks": 0, "loss_rank_avg": 0.4970802664756775, "step": 1800, "valid_targets_mean": 2389.6, "valid_targets_min": 511 }, { "epoch": 2.888, "grad_norm": 0.7474318673230876, "learning_rate": 1.810359740160919e-05, "loss": 0.4377, "loss_nan_ranks": 0, "loss_rank_avg": 0.44770461320877075, "step": 1805, "valid_targets_mean": 2616.4, "valid_targets_min": 629 }, { "epoch": 2.896, "grad_norm": 0.507392374752142, "learning_rate": 1.7992410001355515e-05, "loss": 0.4388, "loss_nan_ranks": 0, "loss_rank_avg": 0.4395235776901245, "step": 1810, "valid_targets_mean": 5488.2, "valid_targets_min": 817 }, { "epoch": 2.904, "grad_norm": 0.7243529623293884, "learning_rate": 1.788128524561168e-05, "loss": 0.4574, "loss_nan_ranks": 0, "loss_rank_avg": 0.5047330260276794, "step": 1815, "valid_targets_mean": 3967.6, "valid_targets_min": 812 }, { "epoch": 2.912, "grad_norm": 0.633168445724496, "learning_rate": 1.7770226601896397e-05, "loss": 0.4324, "loss_nan_ranks": 0, "loss_rank_avg": 0.47217708826065063, "step": 1820, "valid_targets_mean": 3495.6, "valid_targets_min": 767 }, { "epoch": 2.92, "grad_norm": 0.5327318709010629, "learning_rate": 1.7659237535665404e-05, "loss": 0.4572, "loss_nan_ranks": 0, "loss_rank_avg": 0.4306362569332123, "step": 1825, "valid_targets_mean": 4756.9, "valid_targets_min": 754 }, { "epoch": 2.928, "grad_norm": 0.7771946593440502, "learning_rate": 1.754832151020337e-05, "loss": 0.4335, "loss_nan_ranks": 0, "loss_rank_avg": 0.48200926184654236, "step": 1830, "valid_targets_mean": 2481.1, "valid_targets_min": 794 }, { "epoch": 2.936, "grad_norm": 0.7503956631971685, "learning_rate": 1.74374819865158e-05, "loss": 0.435, "loss_nan_ranks": 0, "loss_rank_avg": 0.43350616097450256, "step": 1835, "valid_targets_mean": 2832.5, "valid_targets_min": 751 }, { "epoch": 2.944, "grad_norm": 0.5813547416857596, "learning_rate": 1.7326722423221057e-05, "loss": 0.4394, "loss_nan_ranks": 0, "loss_rank_avg": 0.39921069145202637, "step": 1840, "valid_targets_mean": 3993.1, "valid_targets_min": 597 }, { "epoch": 2.952, "grad_norm": 0.5326305159342818, "learning_rate": 1.7216046276442438e-05, "loss": 0.4184, "loss_nan_ranks": 0, "loss_rank_avg": 0.40970325469970703, "step": 1845, "valid_targets_mean": 5141.3, "valid_targets_min": 868 }, { "epoch": 2.96, "grad_norm": 0.5726519569305819, "learning_rate": 1.7105456999700306e-05, "loss": 0.4379, "loss_nan_ranks": 0, "loss_rank_avg": 0.4494893550872803, "step": 1850, "valid_targets_mean": 4538.5, "valid_targets_min": 1067 }, { "epoch": 2.968, "grad_norm": 0.5532240290014173, "learning_rate": 1.6994958043804374e-05, "loss": 0.4503, "loss_nan_ranks": 0, "loss_rank_avg": 0.41899892687797546, "step": 1855, "valid_targets_mean": 4828.1, "valid_targets_min": 602 }, { "epoch": 2.976, "grad_norm": 0.721900867482457, "learning_rate": 1.6884552856745972e-05, "loss": 0.4625, "loss_nan_ranks": 0, "loss_rank_avg": 0.46213817596435547, "step": 1860, "valid_targets_mean": 2615.0, "valid_targets_min": 288 }, { "epoch": 2.984, "grad_norm": 0.7315015103460375, "learning_rate": 1.6774244883590503e-05, "loss": 0.429, "loss_nan_ranks": 0, "loss_rank_avg": 0.4381354749202728, "step": 1865, "valid_targets_mean": 2520.1, "valid_targets_min": 626 }, { "epoch": 2.992, "grad_norm": 0.5819438726684737, "learning_rate": 1.6664037566369905e-05, "loss": 0.4362, "loss_nan_ranks": 0, "loss_rank_avg": 0.4385232925415039, "step": 1870, "valid_targets_mean": 4100.1, "valid_targets_min": 512 }, { "epoch": 3.0, "grad_norm": 0.6054633776816779, "learning_rate": 1.6553934343975273e-05, "loss": 0.4334, "loss_nan_ranks": 0, "loss_rank_avg": 0.390241801738739, "step": 1875, "valid_targets_mean": 3394.1, "valid_targets_min": 613 }, { "epoch": 3.008, "grad_norm": 0.5137093432064929, "learning_rate": 1.644393865204955e-05, "loss": 0.4403, "loss_nan_ranks": 0, "loss_rank_avg": 0.4124566316604614, "step": 1880, "valid_targets_mean": 4975.0, "valid_targets_min": 279 }, { "epoch": 3.016, "grad_norm": 0.6040955563332363, "learning_rate": 1.6334053922880304e-05, "loss": 0.4166, "loss_nan_ranks": 0, "loss_rank_avg": 0.4080016613006592, "step": 1885, "valid_targets_mean": 4139.9, "valid_targets_min": 1014 }, { "epoch": 3.024, "grad_norm": 0.5643520837040242, "learning_rate": 1.622428358529265e-05, "loss": 0.4374, "loss_nan_ranks": 0, "loss_rank_avg": 0.41588467359542847, "step": 1890, "valid_targets_mean": 4808.7, "valid_targets_min": 837 }, { "epoch": 3.032, "grad_norm": 0.5036209628878098, "learning_rate": 1.611463106454224e-05, "loss": 0.4334, "loss_nan_ranks": 0, "loss_rank_avg": 0.36360183358192444, "step": 1895, "valid_targets_mean": 5222.1, "valid_targets_min": 899 }, { "epoch": 3.04, "grad_norm": 0.643260239413669, "learning_rate": 1.6005099782208392e-05, "loss": 0.4385, "loss_nan_ranks": 0, "loss_rank_avg": 0.4102736711502075, "step": 1900, "valid_targets_mean": 3426.6, "valid_targets_min": 624 }, { "epoch": 3.048, "grad_norm": 0.5073496084996462, "learning_rate": 1.5895693156087317e-05, "loss": 0.4128, "loss_nan_ranks": 0, "loss_rank_avg": 0.41783252358436584, "step": 1905, "valid_targets_mean": 6147.1, "valid_targets_min": 912 }, { "epoch": 3.056, "grad_norm": 0.6943111946599407, "learning_rate": 1.578641460008548e-05, "loss": 0.4402, "loss_nan_ranks": 0, "loss_rank_avg": 0.5079389214515686, "step": 1910, "valid_targets_mean": 3470.3, "valid_targets_min": 508 }, { "epoch": 3.064, "grad_norm": 0.7091661125569685, "learning_rate": 1.5677267524113054e-05, "loss": 0.4107, "loss_nan_ranks": 0, "loss_rank_avg": 0.4140545129776001, "step": 1915, "valid_targets_mean": 3084.1, "valid_targets_min": 626 }, { "epoch": 3.072, "grad_norm": 0.6859858593138877, "learning_rate": 1.5568255333977547e-05, "loss": 0.4185, "loss_nan_ranks": 0, "loss_rank_avg": 0.4497836232185364, "step": 1920, "valid_targets_mean": 3122.9, "valid_targets_min": 664 }, { "epoch": 3.08, "grad_norm": 0.7339049628922232, "learning_rate": 1.5459381431277506e-05, "loss": 0.4082, "loss_nan_ranks": 0, "loss_rank_avg": 0.3879634737968445, "step": 1925, "valid_targets_mean": 2320.3, "valid_targets_min": 447 }, { "epoch": 3.088, "grad_norm": 1.7541410222491485, "learning_rate": 1.5350649213296373e-05, "loss": 0.4249, "loss_nan_ranks": 0, "loss_rank_avg": 0.46820366382598877, "step": 1930, "valid_targets_mean": 3518.1, "valid_targets_min": 646 }, { "epoch": 3.096, "grad_norm": 0.6039401909562907, "learning_rate": 1.5242062072896483e-05, "loss": 0.4204, "loss_nan_ranks": 0, "loss_rank_avg": 0.4237516522407532, "step": 1935, "valid_targets_mean": 4190.0, "valid_targets_min": 884 }, { "epoch": 3.104, "grad_norm": 0.6053990380451738, "learning_rate": 1.5133623398413209e-05, "loss": 0.3991, "loss_nan_ranks": 0, "loss_rank_avg": 0.38883084058761597, "step": 1940, "valid_targets_mean": 3890.1, "valid_targets_min": 621 }, { "epoch": 3.112, "grad_norm": 0.5153567551089437, "learning_rate": 1.50253365735492e-05, "loss": 0.4124, "loss_nan_ranks": 0, "loss_rank_avg": 0.3877636790275574, "step": 1945, "valid_targets_mean": 4963.4, "valid_targets_min": 519 }, { "epoch": 3.12, "grad_norm": 0.7398879295621316, "learning_rate": 1.4917204977268833e-05, "loss": 0.432, "loss_nan_ranks": 0, "loss_rank_avg": 0.4899936318397522, "step": 1950, "valid_targets_mean": 3293.1, "valid_targets_min": 745 }, { "epoch": 3.128, "grad_norm": 0.5271555062833402, "learning_rate": 1.4809231983692733e-05, "loss": 0.4097, "loss_nan_ranks": 0, "loss_rank_avg": 0.37848031520843506, "step": 1955, "valid_targets_mean": 4639.6, "valid_targets_min": 826 }, { "epoch": 3.136, "grad_norm": 0.5550015442785227, "learning_rate": 1.4701420961992533e-05, "loss": 0.424, "loss_nan_ranks": 0, "loss_rank_avg": 0.4084477126598358, "step": 1960, "valid_targets_mean": 4837.2, "valid_targets_min": 556 }, { "epoch": 3.144, "grad_norm": 0.7096847519468681, "learning_rate": 1.459377527628571e-05, "loss": 0.4275, "loss_nan_ranks": 0, "loss_rank_avg": 0.4384467899799347, "step": 1965, "valid_targets_mean": 2920.6, "valid_targets_min": 723 }, { "epoch": 3.152, "grad_norm": 0.7301446675432169, "learning_rate": 1.4486298285530634e-05, "loss": 0.4267, "loss_nan_ranks": 0, "loss_rank_avg": 0.41845598816871643, "step": 1970, "valid_targets_mean": 2488.9, "valid_targets_min": 774 }, { "epoch": 3.16, "grad_norm": 0.675361855978539, "learning_rate": 1.4378993343421736e-05, "loss": 0.4293, "loss_nan_ranks": 0, "loss_rank_avg": 0.41537049412727356, "step": 1975, "valid_targets_mean": 3094.2, "valid_targets_min": 416 }, { "epoch": 3.168, "grad_norm": 0.5632344378219207, "learning_rate": 1.4271863798284877e-05, "loss": 0.4141, "loss_nan_ranks": 0, "loss_rank_avg": 0.4148460030555725, "step": 1980, "valid_targets_mean": 4743.3, "valid_targets_min": 986 }, { "epoch": 3.176, "grad_norm": 0.6386673015303019, "learning_rate": 1.4164912992972846e-05, "loss": 0.4376, "loss_nan_ranks": 0, "loss_rank_avg": 0.4006524682044983, "step": 1985, "valid_targets_mean": 3333.8, "valid_targets_min": 523 }, { "epoch": 3.184, "grad_norm": 0.7341708673500656, "learning_rate": 1.4058144264761087e-05, "loss": 0.4131, "loss_nan_ranks": 0, "loss_rank_avg": 0.4031445384025574, "step": 1990, "valid_targets_mean": 5348.2, "valid_targets_min": 653 }, { "epoch": 3.192, "grad_norm": 0.6108805400299323, "learning_rate": 1.3951560945243517e-05, "loss": 0.4285, "loss_nan_ranks": 0, "loss_rank_avg": 0.39428362250328064, "step": 1995, "valid_targets_mean": 3951.9, "valid_targets_min": 742 }, { "epoch": 3.2, "grad_norm": 0.5744519338390691, "learning_rate": 1.3845166360228597e-05, "loss": 0.4248, "loss_nan_ranks": 0, "loss_rank_avg": 0.40177762508392334, "step": 2000, "valid_targets_mean": 4564.3, "valid_targets_min": 684 }, { "epoch": 3.208, "grad_norm": 0.6863671838454662, "learning_rate": 1.3738963829635559e-05, "loss": 0.4504, "loss_nan_ranks": 0, "loss_rank_avg": 0.46749067306518555, "step": 2005, "valid_targets_mean": 3531.8, "valid_targets_min": 492 }, { "epoch": 3.216, "grad_norm": 0.6671591173606514, "learning_rate": 1.3632956667390784e-05, "loss": 0.407, "loss_nan_ranks": 0, "loss_rank_avg": 0.41678929328918457, "step": 2010, "valid_targets_mean": 3826.6, "valid_targets_min": 561 }, { "epoch": 3.224, "grad_norm": 0.588235248316702, "learning_rate": 1.3527148181324425e-05, "loss": 0.4276, "loss_nan_ranks": 0, "loss_rank_avg": 0.4322141706943512, "step": 2015, "valid_targets_mean": 4300.5, "valid_targets_min": 661 }, { "epoch": 3.232, "grad_norm": 0.5254294071213138, "learning_rate": 1.3421541673067168e-05, "loss": 0.4247, "loss_nan_ranks": 0, "loss_rank_avg": 0.38688722252845764, "step": 2020, "valid_targets_mean": 4773.9, "valid_targets_min": 1061 }, { "epoch": 3.24, "grad_norm": 0.7778220547959938, "learning_rate": 1.3316140437947207e-05, "loss": 0.4248, "loss_nan_ranks": 0, "loss_rank_avg": 0.4182344675064087, "step": 2025, "valid_targets_mean": 4412.4, "valid_targets_min": 572 }, { "epoch": 3.248, "grad_norm": 0.5354024193090587, "learning_rate": 1.321094776488745e-05, "loss": 0.4155, "loss_nan_ranks": 0, "loss_rank_avg": 0.38844114542007446, "step": 2030, "valid_targets_mean": 4513.8, "valid_targets_min": 901 }, { "epoch": 3.2560000000000002, "grad_norm": 0.6307442771592611, "learning_rate": 1.3105966936302856e-05, "loss": 0.4213, "loss_nan_ranks": 0, "loss_rank_avg": 0.3858344554901123, "step": 2035, "valid_targets_mean": 3388.1, "valid_targets_min": 688 }, { "epoch": 3.2640000000000002, "grad_norm": 0.7489956045920521, "learning_rate": 1.3001201227998023e-05, "loss": 0.4295, "loss_nan_ranks": 0, "loss_rank_avg": 0.4188210964202881, "step": 2040, "valid_targets_mean": 2661.7, "valid_targets_min": 659 }, { "epoch": 3.2720000000000002, "grad_norm": 0.650505657663387, "learning_rate": 1.2896653909064964e-05, "loss": 0.4111, "loss_nan_ranks": 0, "loss_rank_avg": 0.4127410650253296, "step": 2045, "valid_targets_mean": 4998.3, "valid_targets_min": 813 }, { "epoch": 3.2800000000000002, "grad_norm": 0.5978107263413696, "learning_rate": 1.2792328241781124e-05, "loss": 0.4016, "loss_nan_ranks": 0, "loss_rank_avg": 0.4069294035434723, "step": 2050, "valid_targets_mean": 4051.4, "valid_targets_min": 744 }, { "epoch": 3.288, "grad_norm": 0.629812123697598, "learning_rate": 1.2688227481507546e-05, "loss": 0.4182, "loss_nan_ranks": 0, "loss_rank_avg": 0.4324589669704437, "step": 2055, "valid_targets_mean": 3744.4, "valid_targets_min": 681 }, { "epoch": 3.296, "grad_norm": 0.5549507988646019, "learning_rate": 1.258435487658733e-05, "loss": 0.4164, "loss_nan_ranks": 0, "loss_rank_avg": 0.40244030952453613, "step": 2060, "valid_targets_mean": 4940.0, "valid_targets_min": 649 }, { "epoch": 3.304, "grad_norm": 0.5869431749420226, "learning_rate": 1.2480713668244243e-05, "loss": 0.4177, "loss_nan_ranks": 0, "loss_rank_avg": 0.41933634877204895, "step": 2065, "valid_targets_mean": 4774.3, "valid_targets_min": 601 }, { "epoch": 3.312, "grad_norm": 0.5560654604661646, "learning_rate": 1.2377307090481586e-05, "loss": 0.4348, "loss_nan_ranks": 0, "loss_rank_avg": 0.40384188294410706, "step": 2070, "valid_targets_mean": 4333.8, "valid_targets_min": 629 }, { "epoch": 3.32, "grad_norm": 0.5258484186346938, "learning_rate": 1.2274138369981298e-05, "loss": 0.4312, "loss_nan_ranks": 0, "loss_rank_avg": 0.41006770730018616, "step": 2075, "valid_targets_mean": 5825.9, "valid_targets_min": 816 }, { "epoch": 3.328, "grad_norm": 0.721798719675094, "learning_rate": 1.2171210726003256e-05, "loss": 0.4107, "loss_nan_ranks": 0, "loss_rank_avg": 0.4058680534362793, "step": 2080, "valid_targets_mean": 3225.6, "valid_targets_min": 823 }, { "epoch": 3.336, "grad_norm": 0.6048643181166588, "learning_rate": 1.2068527370284815e-05, "loss": 0.4163, "loss_nan_ranks": 0, "loss_rank_avg": 0.45129895210266113, "step": 2085, "valid_targets_mean": 3978.1, "valid_targets_min": 534 }, { "epoch": 3.344, "grad_norm": 0.6339488417554544, "learning_rate": 1.1966091506940616e-05, "loss": 0.4184, "loss_nan_ranks": 0, "loss_rank_avg": 0.43796777725219727, "step": 2090, "valid_targets_mean": 3807.9, "valid_targets_min": 753 }, { "epoch": 3.352, "grad_norm": 0.6016289665121101, "learning_rate": 1.1863906332362569e-05, "loss": 0.4001, "loss_nan_ranks": 0, "loss_rank_avg": 0.41488558053970337, "step": 2095, "valid_targets_mean": 3921.8, "valid_targets_min": 687 }, { "epoch": 3.36, "grad_norm": 0.6737156145382773, "learning_rate": 1.176197503512015e-05, "loss": 0.4234, "loss_nan_ranks": 0, "loss_rank_avg": 0.46443474292755127, "step": 2100, "valid_targets_mean": 3559.7, "valid_targets_min": 823 }, { "epoch": 3.368, "grad_norm": 0.6983626223742545, "learning_rate": 1.1660300795860877e-05, "loss": 0.427, "loss_nan_ranks": 0, "loss_rank_avg": 0.432370662689209, "step": 2105, "valid_targets_mean": 3061.8, "valid_targets_min": 769 }, { "epoch": 3.376, "grad_norm": 0.4979082402374389, "learning_rate": 1.1558886787211071e-05, "loss": 0.4313, "loss_nan_ranks": 0, "loss_rank_avg": 0.3811950087547302, "step": 2110, "valid_targets_mean": 5262.1, "valid_targets_min": 892 }, { "epoch": 3.384, "grad_norm": 0.5658616278719996, "learning_rate": 1.1457736173676883e-05, "loss": 0.384, "loss_nan_ranks": 0, "loss_rank_avg": 0.39314180612564087, "step": 2115, "valid_targets_mean": 5609.1, "valid_targets_min": 701 }, { "epoch": 3.392, "grad_norm": 0.5742144020806174, "learning_rate": 1.1356852111545493e-05, "loss": 0.4091, "loss_nan_ranks": 0, "loss_rank_avg": 0.3987872302532196, "step": 2120, "valid_targets_mean": 4422.2, "valid_targets_min": 775 }, { "epoch": 3.4, "grad_norm": 0.5596475137152013, "learning_rate": 1.1256237748786675e-05, "loss": 0.3993, "loss_nan_ranks": 0, "loss_rank_avg": 0.3944924771785736, "step": 2125, "valid_targets_mean": 4539.4, "valid_targets_min": 860 }, { "epoch": 3.408, "grad_norm": 0.6913259682743764, "learning_rate": 1.1155896224954543e-05, "loss": 0.4201, "loss_nan_ranks": 0, "loss_rank_avg": 0.46018218994140625, "step": 2130, "valid_targets_mean": 3704.2, "valid_targets_min": 719 }, { "epoch": 3.416, "grad_norm": 0.5521206700287924, "learning_rate": 1.1055830671089578e-05, "loss": 0.4269, "loss_nan_ranks": 0, "loss_rank_avg": 0.41626229882240295, "step": 2135, "valid_targets_mean": 5602.0, "valid_targets_min": 737 }, { "epoch": 3.424, "grad_norm": 0.626791492401228, "learning_rate": 1.0956044209620966e-05, "loss": 0.4124, "loss_nan_ranks": 0, "loss_rank_avg": 0.435993492603302, "step": 2140, "valid_targets_mean": 3991.2, "valid_targets_min": 496 }, { "epoch": 3.432, "grad_norm": 0.476094171103533, "learning_rate": 1.0856539954269121e-05, "loss": 0.4253, "loss_nan_ranks": 0, "loss_rank_avg": 0.377435564994812, "step": 2145, "valid_targets_mean": 5789.9, "valid_targets_min": 884 }, { "epoch": 3.44, "grad_norm": 0.6128716789365134, "learning_rate": 1.0757321009948543e-05, "loss": 0.419, "loss_nan_ranks": 0, "loss_rank_avg": 0.40004780888557434, "step": 2150, "valid_targets_mean": 3649.6, "valid_targets_min": 495 }, { "epoch": 3.448, "grad_norm": 0.5575841204894988, "learning_rate": 1.0658390472670938e-05, "loss": 0.3902, "loss_nan_ranks": 0, "loss_rank_avg": 0.4247322976589203, "step": 2155, "valid_targets_mean": 6032.9, "valid_targets_min": 985 }, { "epoch": 3.456, "grad_norm": 0.6753891110732698, "learning_rate": 1.0559751429448597e-05, "loss": 0.42, "loss_nan_ranks": 0, "loss_rank_avg": 0.4023727774620056, "step": 2160, "valid_targets_mean": 4760.7, "valid_targets_min": 647 }, { "epoch": 3.464, "grad_norm": 0.6369181074836571, "learning_rate": 1.0461406958198101e-05, "loss": 0.4353, "loss_nan_ranks": 0, "loss_rank_avg": 0.4552972912788391, "step": 2165, "valid_targets_mean": 4412.8, "valid_targets_min": 600 }, { "epoch": 3.472, "grad_norm": 0.4819966107421562, "learning_rate": 1.0363360127644235e-05, "loss": 0.4066, "loss_nan_ranks": 0, "loss_rank_avg": 0.394325315952301, "step": 2170, "valid_targets_mean": 7356.6, "valid_targets_min": 622 }, { "epoch": 3.48, "grad_norm": 0.6128025340406501, "learning_rate": 1.0265613997224255e-05, "loss": 0.4125, "loss_nan_ranks": 0, "loss_rank_avg": 0.3975856900215149, "step": 2175, "valid_targets_mean": 3477.9, "valid_targets_min": 605 }, { "epoch": 3.488, "grad_norm": 0.6384157096000654, "learning_rate": 1.0168171616992422e-05, "loss": 0.4358, "loss_nan_ranks": 0, "loss_rank_avg": 0.4252591133117676, "step": 2180, "valid_targets_mean": 4226.9, "valid_targets_min": 498 }, { "epoch": 3.496, "grad_norm": 0.5767516682589051, "learning_rate": 1.007103602752483e-05, "loss": 0.4353, "loss_nan_ranks": 0, "loss_rank_avg": 0.4446565508842468, "step": 2185, "valid_targets_mean": 4517.8, "valid_targets_min": 576 }, { "epoch": 3.504, "grad_norm": 0.5724798059794518, "learning_rate": 9.974210259824505e-06, "loss": 0.4143, "loss_nan_ranks": 0, "loss_rank_avg": 0.4005405604839325, "step": 2190, "valid_targets_mean": 4404.2, "valid_targets_min": 678 }, { "epoch": 3.512, "grad_norm": 0.5733569057551399, "learning_rate": 9.877697335226872e-06, "loss": 0.4355, "loss_nan_ranks": 0, "loss_rank_avg": 0.4807167053222656, "step": 2195, "valid_targets_mean": 4826.6, "valid_targets_min": 592 }, { "epoch": 3.52, "grad_norm": 1.1802646891503334, "learning_rate": 9.781500265305448e-06, "loss": 0.422, "loss_nan_ranks": 0, "loss_rank_avg": 0.43163758516311646, "step": 2200, "valid_targets_mean": 3795.0, "valid_targets_min": 705 }, { "epoch": 3.528, "grad_norm": 0.8845021260253212, "learning_rate": 9.685622051777856e-06, "loss": 0.4302, "loss_nan_ranks": 0, "loss_rank_avg": 0.46266594529151917, "step": 2205, "valid_targets_mean": 2079.0, "valid_targets_min": 708 }, { "epoch": 3.536, "grad_norm": 0.5646534622070288, "learning_rate": 9.590065686412182e-06, "loss": 0.3905, "loss_nan_ranks": 0, "loss_rank_avg": 0.39034974575042725, "step": 2210, "valid_targets_mean": 4726.2, "valid_targets_min": 376 }, { "epoch": 3.544, "grad_norm": 0.5508012766023374, "learning_rate": 9.494834150933616e-06, "loss": 0.412, "loss_nan_ranks": 0, "loss_rank_avg": 0.42684414982795715, "step": 2215, "valid_targets_mean": 5314.9, "valid_targets_min": 552 }, { "epoch": 3.552, "grad_norm": 0.6966622976087259, "learning_rate": 9.399930416931404e-06, "loss": 0.4038, "loss_nan_ranks": 0, "loss_rank_avg": 0.38812291622161865, "step": 2220, "valid_targets_mean": 4849.1, "valid_targets_min": 860 }, { "epoch": 3.56, "grad_norm": 0.5793443000507197, "learning_rate": 9.30535744576615e-06, "loss": 0.4427, "loss_nan_ranks": 0, "loss_rank_avg": 0.4145427346229553, "step": 2225, "valid_targets_mean": 4751.8, "valid_targets_min": 899 }, { "epoch": 3.568, "grad_norm": 0.5440153611777978, "learning_rate": 9.211118188477362e-06, "loss": 0.4129, "loss_nan_ranks": 0, "loss_rank_avg": 0.42668813467025757, "step": 2230, "valid_targets_mean": 4886.1, "valid_targets_min": 825 }, { "epoch": 3.576, "grad_norm": 0.6942559664808036, "learning_rate": 9.117215585691408e-06, "loss": 0.4318, "loss_nan_ranks": 0, "loss_rank_avg": 0.408440500497818, "step": 2235, "valid_targets_mean": 2879.3, "valid_targets_min": 575 }, { "epoch": 3.584, "grad_norm": 0.5283718370515997, "learning_rate": 9.023652567529744e-06, "loss": 0.4322, "loss_nan_ranks": 0, "loss_rank_avg": 0.39787888526916504, "step": 2240, "valid_targets_mean": 5380.6, "valid_targets_min": 797 }, { "epoch": 3.592, "grad_norm": 0.7189589849266416, "learning_rate": 8.930432053517465e-06, "loss": 0.4315, "loss_nan_ranks": 0, "loss_rank_avg": 0.4863574802875519, "step": 2245, "valid_targets_mean": 3135.4, "valid_targets_min": 598 }, { "epoch": 3.6, "grad_norm": 0.5688319197328309, "learning_rate": 8.837556952492264e-06, "loss": 0.427, "loss_nan_ranks": 0, "loss_rank_avg": 0.443569153547287, "step": 2250, "valid_targets_mean": 4849.6, "valid_targets_min": 491 }, { "epoch": 3.608, "grad_norm": 0.6372972828422979, "learning_rate": 8.745030162513582e-06, "loss": 0.4112, "loss_nan_ranks": 0, "loss_rank_avg": 0.43911778926849365, "step": 2255, "valid_targets_mean": 3977.4, "valid_targets_min": 674 }, { "epoch": 3.616, "grad_norm": 0.646929389349214, "learning_rate": 8.652854570772236e-06, "loss": 0.4331, "loss_nan_ranks": 0, "loss_rank_avg": 0.452898770570755, "step": 2260, "valid_targets_mean": 4541.3, "valid_targets_min": 622 }, { "epoch": 3.624, "grad_norm": 0.536120974383535, "learning_rate": 8.561033053500312e-06, "loss": 0.4012, "loss_nan_ranks": 0, "loss_rank_avg": 0.39861825108528137, "step": 2265, "valid_targets_mean": 5880.2, "valid_targets_min": 680 }, { "epoch": 3.632, "grad_norm": 0.5883785010261273, "learning_rate": 8.46956847588141e-06, "loss": 0.4105, "loss_nan_ranks": 0, "loss_rank_avg": 0.4199955463409424, "step": 2270, "valid_targets_mean": 4599.0, "valid_targets_min": 569 }, { "epoch": 3.64, "grad_norm": 0.43011548249040454, "learning_rate": 8.378463691961237e-06, "loss": 0.4052, "loss_nan_ranks": 0, "loss_rank_avg": 0.3818497955799103, "step": 2275, "valid_targets_mean": 7853.8, "valid_targets_min": 1082 }, { "epoch": 3.648, "grad_norm": 0.7813234320320073, "learning_rate": 8.287721544558574e-06, "loss": 0.423, "loss_nan_ranks": 0, "loss_rank_avg": 0.4120236039161682, "step": 2280, "valid_targets_mean": 2544.3, "valid_targets_min": 777 }, { "epoch": 3.656, "grad_norm": 0.5267738721602246, "learning_rate": 8.197344865176548e-06, "loss": 0.4039, "loss_nan_ranks": 0, "loss_rank_avg": 0.3992099165916443, "step": 2285, "valid_targets_mean": 4824.1, "valid_targets_min": 421 }, { "epoch": 3.664, "grad_norm": 0.5857399421262088, "learning_rate": 8.10733647391427e-06, "loss": 0.4192, "loss_nan_ranks": 0, "loss_rank_avg": 0.3996756076812744, "step": 2290, "valid_targets_mean": 4680.4, "valid_targets_min": 573 }, { "epoch": 3.672, "grad_norm": 0.6229880410561257, "learning_rate": 8.017699179378849e-06, "loss": 0.4102, "loss_nan_ranks": 0, "loss_rank_avg": 0.4350461959838867, "step": 2295, "valid_targets_mean": 4063.1, "valid_targets_min": 535 }, { "epoch": 3.68, "grad_norm": 0.5935713053030977, "learning_rate": 7.928435778597763e-06, "loss": 0.4112, "loss_nan_ranks": 0, "loss_rank_avg": 0.4190603494644165, "step": 2300, "valid_targets_mean": 4264.1, "valid_targets_min": 754 }, { "epoch": 3.6879999999999997, "grad_norm": 0.8421241937446357, "learning_rate": 7.839549056931557e-06, "loss": 0.4345, "loss_nan_ranks": 0, "loss_rank_avg": 0.4648500084877014, "step": 2305, "valid_targets_mean": 2191.2, "valid_targets_min": 812 }, { "epoch": 3.6959999999999997, "grad_norm": 0.627712707242927, "learning_rate": 7.751041787986965e-06, "loss": 0.4379, "loss_nan_ranks": 0, "loss_rank_avg": 0.43756556510925293, "step": 2310, "valid_targets_mean": 3952.6, "valid_targets_min": 644 }, { "epoch": 3.7039999999999997, "grad_norm": 0.55692187150058, "learning_rate": 7.662916733530317e-06, "loss": 0.4214, "loss_nan_ranks": 0, "loss_rank_avg": 0.410952627658844, "step": 2315, "valid_targets_mean": 5188.4, "valid_targets_min": 1013 }, { "epoch": 3.7119999999999997, "grad_norm": 0.5841435249769223, "learning_rate": 7.575176643401394e-06, "loss": 0.3974, "loss_nan_ranks": 0, "loss_rank_avg": 0.39622873067855835, "step": 2320, "valid_targets_mean": 4232.2, "valid_targets_min": 691 }, { "epoch": 3.7199999999999998, "grad_norm": 0.6334245660960619, "learning_rate": 7.487824255427616e-06, "loss": 0.4123, "loss_nan_ranks": 0, "loss_rank_avg": 0.43315714597702026, "step": 2325, "valid_targets_mean": 3952.1, "valid_targets_min": 679 }, { "epoch": 3.7279999999999998, "grad_norm": 0.7125490272965322, "learning_rate": 7.400862295338595e-06, "loss": 0.4025, "loss_nan_ranks": 0, "loss_rank_avg": 0.3960513770580292, "step": 2330, "valid_targets_mean": 2979.1, "valid_targets_min": 568 }, { "epoch": 3.7359999999999998, "grad_norm": 0.7047871741832065, "learning_rate": 7.314293476681122e-06, "loss": 0.4278, "loss_nan_ranks": 0, "loss_rank_avg": 0.4047684669494629, "step": 2335, "valid_targets_mean": 2885.2, "valid_targets_min": 769 }, { "epoch": 3.7439999999999998, "grad_norm": 0.5235697909405775, "learning_rate": 7.228120500734443e-06, "loss": 0.4066, "loss_nan_ranks": 0, "loss_rank_avg": 0.39039039611816406, "step": 2340, "valid_targets_mean": 5031.9, "valid_targets_min": 693 }, { "epoch": 3.752, "grad_norm": 0.6309610439555812, "learning_rate": 7.1423460564259995e-06, "loss": 0.425, "loss_nan_ranks": 0, "loss_rank_avg": 0.41119787096977234, "step": 2345, "valid_targets_mean": 3696.8, "valid_targets_min": 615 }, { "epoch": 3.76, "grad_norm": 0.6046248254536285, "learning_rate": 7.056972820247516e-06, "loss": 0.4312, "loss_nan_ranks": 0, "loss_rank_avg": 0.44950827956199646, "step": 2350, "valid_targets_mean": 4173.8, "valid_targets_min": 468 }, { "epoch": 3.768, "grad_norm": 0.5813802818171795, "learning_rate": 6.97200345617149e-06, "loss": 0.4251, "loss_nan_ranks": 0, "loss_rank_avg": 0.4253307580947876, "step": 2355, "valid_targets_mean": 4188.0, "valid_targets_min": 906 }, { "epoch": 3.776, "grad_norm": 0.6480857251816284, "learning_rate": 6.887440615568044e-06, "loss": 0.4063, "loss_nan_ranks": 0, "loss_rank_avg": 0.3995557427406311, "step": 2360, "valid_targets_mean": 3368.9, "valid_targets_min": 696 }, { "epoch": 3.784, "grad_norm": 0.5978641746105108, "learning_rate": 6.803286937122233e-06, "loss": 0.4212, "loss_nan_ranks": 0, "loss_rank_avg": 0.4335462749004364, "step": 2365, "valid_targets_mean": 4294.8, "valid_targets_min": 717 }, { "epoch": 3.792, "grad_norm": 0.705354339864555, "learning_rate": 6.719545046751674e-06, "loss": 0.4492, "loss_nan_ranks": 0, "loss_rank_avg": 0.4486519694328308, "step": 2370, "valid_targets_mean": 3097.3, "valid_targets_min": 667 }, { "epoch": 3.8, "grad_norm": 0.6447197729667318, "learning_rate": 6.636217557524605e-06, "loss": 0.4089, "loss_nan_ranks": 0, "loss_rank_avg": 0.4317939281463623, "step": 2375, "valid_targets_mean": 3642.6, "valid_targets_min": 738 }, { "epoch": 3.808, "grad_norm": 0.5784840504063279, "learning_rate": 6.55330706957837e-06, "loss": 0.4183, "loss_nan_ranks": 0, "loss_rank_avg": 0.417401522397995, "step": 2380, "valid_targets_mean": 4374.6, "valid_targets_min": 1143 }, { "epoch": 3.816, "grad_norm": 0.5691858028003152, "learning_rate": 6.4708161700382655e-06, "loss": 0.4185, "loss_nan_ranks": 0, "loss_rank_avg": 0.4285266101360321, "step": 2385, "valid_targets_mean": 4893.5, "valid_targets_min": 527 }, { "epoch": 3.824, "grad_norm": 0.6509918903676563, "learning_rate": 6.388747432936819e-06, "loss": 0.3843, "loss_nan_ranks": 0, "loss_rank_avg": 0.39279234409332275, "step": 2390, "valid_targets_mean": 3421.9, "valid_targets_min": 766 }, { "epoch": 3.832, "grad_norm": 0.5477195713050954, "learning_rate": 6.3071034191334915e-06, "loss": 0.3963, "loss_nan_ranks": 0, "loss_rank_avg": 0.4045974612236023, "step": 2395, "valid_targets_mean": 4609.2, "valid_targets_min": 868 }, { "epoch": 3.84, "grad_norm": 0.6280025691309644, "learning_rate": 6.22588667623472e-06, "loss": 0.4238, "loss_nan_ranks": 0, "loss_rank_avg": 0.4826744794845581, "step": 2400, "valid_targets_mean": 4032.0, "valid_targets_min": 473 }, { "epoch": 3.848, "grad_norm": 0.7680355117957194, "learning_rate": 6.145099738514466e-06, "loss": 0.4578, "loss_nan_ranks": 0, "loss_rank_avg": 0.4926620125770569, "step": 2405, "valid_targets_mean": 2725.4, "valid_targets_min": 615 }, { "epoch": 3.856, "grad_norm": 0.5711966605379848, "learning_rate": 6.064745126835112e-06, "loss": 0.4111, "loss_nan_ranks": 0, "loss_rank_avg": 0.4238035976886749, "step": 2410, "valid_targets_mean": 4564.0, "valid_targets_min": 679 }, { "epoch": 3.864, "grad_norm": 0.5326749978154176, "learning_rate": 5.984825348568812e-06, "loss": 0.4065, "loss_nan_ranks": 0, "loss_rank_avg": 0.4256637394428253, "step": 2415, "valid_targets_mean": 5087.4, "valid_targets_min": 659 }, { "epoch": 3.872, "grad_norm": 0.7369604952522139, "learning_rate": 5.905342897519262e-06, "loss": 0.405, "loss_nan_ranks": 0, "loss_rank_avg": 0.47101157903671265, "step": 2420, "valid_targets_mean": 2868.4, "valid_targets_min": 558 }, { "epoch": 3.88, "grad_norm": 0.7010288266967857, "learning_rate": 5.826300253843851e-06, "loss": 0.421, "loss_nan_ranks": 0, "loss_rank_avg": 0.3836793005466461, "step": 2425, "valid_targets_mean": 2784.3, "valid_targets_min": 575 }, { "epoch": 3.888, "grad_norm": 0.6824318371192227, "learning_rate": 5.7476998839763035e-06, "loss": 0.4208, "loss_nan_ranks": 0, "loss_rank_avg": 0.4819521903991699, "step": 2430, "valid_targets_mean": 3219.2, "valid_targets_min": 660 }, { "epoch": 3.896, "grad_norm": 0.5989642635294622, "learning_rate": 5.669544240549698e-06, "loss": 0.3991, "loss_nan_ranks": 0, "loss_rank_avg": 0.40206092596054077, "step": 2435, "valid_targets_mean": 3922.7, "valid_targets_min": 749 }, { "epoch": 3.904, "grad_norm": 0.5819637834251199, "learning_rate": 5.591835762319946e-06, "loss": 0.43, "loss_nan_ranks": 0, "loss_rank_avg": 0.4356538653373718, "step": 2440, "valid_targets_mean": 4358.2, "valid_targets_min": 989 }, { "epoch": 3.912, "grad_norm": 0.8093003590748824, "learning_rate": 5.514576874089683e-06, "loss": 0.4171, "loss_nan_ranks": 0, "loss_rank_avg": 0.4149336814880371, "step": 2445, "valid_targets_mean": 3634.3, "valid_targets_min": 761 }, { "epoch": 3.92, "grad_norm": 0.5637333714042546, "learning_rate": 5.437769986632622e-06, "loss": 0.4192, "loss_nan_ranks": 0, "loss_rank_avg": 0.40535950660705566, "step": 2450, "valid_targets_mean": 5450.6, "valid_targets_min": 842 }, { "epoch": 3.928, "grad_norm": 0.5650303402408413, "learning_rate": 5.361417496618315e-06, "loss": 0.4098, "loss_nan_ranks": 0, "loss_rank_avg": 0.4089083969593048, "step": 2455, "valid_targets_mean": 4491.1, "valid_targets_min": 402 }, { "epoch": 3.936, "grad_norm": 0.5862251884278199, "learning_rate": 5.285521786537368e-06, "loss": 0.4296, "loss_nan_ranks": 0, "loss_rank_avg": 0.421395480632782, "step": 2460, "valid_targets_mean": 4045.0, "valid_targets_min": 499 }, { "epoch": 3.944, "grad_norm": 0.5911774172256168, "learning_rate": 5.2100852246270975e-06, "loss": 0.4443, "loss_nan_ranks": 0, "loss_rank_avg": 0.41850167512893677, "step": 2465, "valid_targets_mean": 4103.7, "valid_targets_min": 1182 }, { "epoch": 3.952, "grad_norm": 0.5488515492585119, "learning_rate": 5.135110164797637e-06, "loss": 0.4105, "loss_nan_ranks": 0, "loss_rank_avg": 0.4004397988319397, "step": 2470, "valid_targets_mean": 4585.8, "valid_targets_min": 741 }, { "epoch": 3.96, "grad_norm": 0.638564502319262, "learning_rate": 5.060598946558484e-06, "loss": 0.4295, "loss_nan_ranks": 0, "loss_rank_avg": 0.43971341848373413, "step": 2475, "valid_targets_mean": 3582.0, "valid_targets_min": 847 }, { "epoch": 3.968, "grad_norm": 0.5896713751876114, "learning_rate": 4.986553894945512e-06, "loss": 0.3939, "loss_nan_ranks": 0, "loss_rank_avg": 0.3950134515762329, "step": 2480, "valid_targets_mean": 3773.5, "valid_targets_min": 801 }, { "epoch": 3.976, "grad_norm": 0.7928195363821494, "learning_rate": 4.912977320448391e-06, "loss": 0.4187, "loss_nan_ranks": 0, "loss_rank_avg": 0.3833604156970978, "step": 2485, "valid_targets_mean": 3785.5, "valid_targets_min": 473 }, { "epoch": 3.984, "grad_norm": 0.59139939976017, "learning_rate": 4.839871518938513e-06, "loss": 0.4383, "loss_nan_ranks": 0, "loss_rank_avg": 0.4159795343875885, "step": 2490, "valid_targets_mean": 4217.4, "valid_targets_min": 663 }, { "epoch": 3.992, "grad_norm": 0.550470299025946, "learning_rate": 4.767238771597347e-06, "loss": 0.3958, "loss_nan_ranks": 0, "loss_rank_avg": 0.3765171766281128, "step": 2495, "valid_targets_mean": 5407.9, "valid_targets_min": 1275 }, { "epoch": 4.0, "grad_norm": 0.6285536655979354, "learning_rate": 4.695081344845254e-06, "loss": 0.4467, "loss_nan_ranks": 0, "loss_rank_avg": 0.4390712380409241, "step": 2500, "valid_targets_mean": 3904.6, "valid_targets_min": 767 }, { "epoch": 4.008, "grad_norm": 0.6426613600523003, "learning_rate": 4.623401490270778e-06, "loss": 0.4065, "loss_nan_ranks": 0, "loss_rank_avg": 0.4202343821525574, "step": 2505, "valid_targets_mean": 3585.8, "valid_targets_min": 631 }, { "epoch": 4.016, "grad_norm": 0.5585933431067611, "learning_rate": 4.552201444560373e-06, "loss": 0.4059, "loss_nan_ranks": 0, "loss_rank_avg": 0.4154893457889557, "step": 2510, "valid_targets_mean": 4587.4, "valid_targets_min": 834 }, { "epoch": 4.024, "grad_norm": 0.5849602135579992, "learning_rate": 4.481483429428615e-06, "loss": 0.4119, "loss_nan_ranks": 0, "loss_rank_avg": 0.3984353244304657, "step": 2515, "valid_targets_mean": 4275.1, "valid_targets_min": 657 }, { "epoch": 4.032, "grad_norm": 0.5834357553067363, "learning_rate": 4.4112496515488765e-06, "loss": 0.4011, "loss_nan_ranks": 0, "loss_rank_avg": 0.3702242970466614, "step": 2520, "valid_targets_mean": 4599.9, "valid_targets_min": 608 }, { "epoch": 4.04, "grad_norm": 0.7637639117096291, "learning_rate": 4.341502302484472e-06, "loss": 0.425, "loss_nan_ranks": 0, "loss_rank_avg": 0.4776972532272339, "step": 2525, "valid_targets_mean": 2890.7, "valid_targets_min": 695 }, { "epoch": 4.048, "grad_norm": 0.7468745784687151, "learning_rate": 4.272243558620264e-06, "loss": 0.4098, "loss_nan_ranks": 0, "loss_rank_avg": 0.4523777961730957, "step": 2530, "valid_targets_mean": 2911.0, "valid_targets_min": 333 }, { "epoch": 4.056, "grad_norm": 0.5802649714021575, "learning_rate": 4.203475581094771e-06, "loss": 0.4373, "loss_nan_ranks": 0, "loss_rank_avg": 0.425601601600647, "step": 2535, "valid_targets_mean": 5046.2, "valid_targets_min": 973 }, { "epoch": 4.064, "grad_norm": 0.5447288932891161, "learning_rate": 4.135200515732716e-06, "loss": 0.4005, "loss_nan_ranks": 0, "loss_rank_avg": 0.3529777526855469, "step": 2540, "valid_targets_mean": 4301.4, "valid_targets_min": 653 }, { "epoch": 4.072, "grad_norm": 0.7333561005546733, "learning_rate": 4.067420492978065e-06, "loss": 0.406, "loss_nan_ranks": 0, "loss_rank_avg": 0.4183364808559418, "step": 2545, "valid_targets_mean": 2979.5, "valid_targets_min": 433 }, { "epoch": 4.08, "grad_norm": 0.8448397467709902, "learning_rate": 4.000137627827554e-06, "loss": 0.411, "loss_nan_ranks": 0, "loss_rank_avg": 0.4083864092826843, "step": 2550, "valid_targets_mean": 1975.8, "valid_targets_min": 737 }, { "epoch": 4.088, "grad_norm": 0.7039063639446964, "learning_rate": 3.9333540197647035e-06, "loss": 0.3973, "loss_nan_ranks": 0, "loss_rank_avg": 0.42661112546920776, "step": 2555, "valid_targets_mean": 2891.6, "valid_targets_min": 744 }, { "epoch": 4.096, "grad_norm": 0.6103368310567368, "learning_rate": 3.867071752694282e-06, "loss": 0.4136, "loss_nan_ranks": 0, "loss_rank_avg": 0.39338967204093933, "step": 2560, "valid_targets_mean": 4202.1, "valid_targets_min": 619 }, { "epoch": 4.104, "grad_norm": 0.5609255390428625, "learning_rate": 3.8012928948773243e-06, "loss": 0.4109, "loss_nan_ranks": 0, "loss_rank_avg": 0.3590124249458313, "step": 2565, "valid_targets_mean": 4673.2, "valid_targets_min": 534 }, { "epoch": 4.112, "grad_norm": 0.7163062681286491, "learning_rate": 3.7360194988665364e-06, "loss": 0.4282, "loss_nan_ranks": 0, "loss_rank_avg": 0.4255579113960266, "step": 2570, "valid_targets_mean": 2981.8, "valid_targets_min": 753 }, { "epoch": 4.12, "grad_norm": 0.4784051484306757, "learning_rate": 3.6712536014422885e-06, "loss": 0.405, "loss_nan_ranks": 0, "loss_rank_avg": 0.350371778011322, "step": 2575, "valid_targets_mean": 6456.9, "valid_targets_min": 503 }, { "epoch": 4.128, "grad_norm": 0.5438453496849911, "learning_rate": 3.606997223549049e-06, "loss": 0.4131, "loss_nan_ranks": 0, "loss_rank_avg": 0.4073692262172699, "step": 2580, "valid_targets_mean": 5279.0, "valid_targets_min": 801 }, { "epoch": 4.136, "grad_norm": 0.5879969117381249, "learning_rate": 3.543252370232313e-06, "loss": 0.3969, "loss_nan_ranks": 0, "loss_rank_avg": 0.4108046889305115, "step": 2585, "valid_targets_mean": 4720.8, "valid_targets_min": 959 }, { "epoch": 4.144, "grad_norm": 0.5532047673911502, "learning_rate": 3.4800210305760662e-06, "loss": 0.4201, "loss_nan_ranks": 0, "loss_rank_avg": 0.36657533049583435, "step": 2590, "valid_targets_mean": 4700.9, "valid_targets_min": 601 }, { "epoch": 4.152, "grad_norm": 0.5901167864484412, "learning_rate": 3.4173051776406817e-06, "loss": 0.4184, "loss_nan_ranks": 0, "loss_rank_avg": 0.4340725541114807, "step": 2595, "valid_targets_mean": 4326.9, "valid_targets_min": 523 }, { "epoch": 4.16, "grad_norm": 0.5475674178491631, "learning_rate": 3.3551067684013706e-06, "loss": 0.4084, "loss_nan_ranks": 0, "loss_rank_avg": 0.3945809006690979, "step": 2600, "valid_targets_mean": 4706.6, "valid_targets_min": 851 }, { "epoch": 4.168, "grad_norm": 0.5801898131217752, "learning_rate": 3.2934277436871187e-06, "loss": 0.3873, "loss_nan_ranks": 0, "loss_rank_avg": 0.3601340055465698, "step": 2605, "valid_targets_mean": 4189.4, "valid_targets_min": 534 }, { "epoch": 4.176, "grad_norm": 0.7299558918948404, "learning_rate": 3.232270028120121e-06, "loss": 0.405, "loss_nan_ranks": 0, "loss_rank_avg": 0.4364742338657379, "step": 2610, "valid_targets_mean": 2916.7, "valid_targets_min": 703 }, { "epoch": 4.184, "grad_norm": 0.5996399283446768, "learning_rate": 3.1716355300557256e-06, "loss": 0.3937, "loss_nan_ranks": 0, "loss_rank_avg": 0.4116308391094208, "step": 2615, "valid_targets_mean": 4109.5, "valid_targets_min": 600 }, { "epoch": 4.192, "grad_norm": 0.5730701668848656, "learning_rate": 3.111526141522896e-06, "loss": 0.4212, "loss_nan_ranks": 0, "loss_rank_avg": 0.41376417875289917, "step": 2620, "valid_targets_mean": 4810.4, "valid_targets_min": 691 }, { "epoch": 4.2, "grad_norm": 0.5519404941783179, "learning_rate": 3.0519437381651507e-06, "loss": 0.4072, "loss_nan_ranks": 0, "loss_rank_avg": 0.40336206555366516, "step": 2625, "valid_targets_mean": 4866.9, "valid_targets_min": 749 }, { "epoch": 4.208, "grad_norm": 0.5067010081942247, "learning_rate": 2.992890179182062e-06, "loss": 0.3919, "loss_nan_ranks": 0, "loss_rank_avg": 0.35615670680999756, "step": 2630, "valid_targets_mean": 5492.4, "valid_targets_min": 631 }, { "epoch": 4.216, "grad_norm": 0.6359749406389089, "learning_rate": 2.93436730727122e-06, "loss": 0.4067, "loss_nan_ranks": 0, "loss_rank_avg": 0.402889609336853, "step": 2635, "valid_targets_mean": 3929.4, "valid_targets_min": 912 }, { "epoch": 4.224, "grad_norm": 0.6145949359641839, "learning_rate": 2.8763769485707447e-06, "loss": 0.3904, "loss_nan_ranks": 0, "loss_rank_avg": 0.40841296315193176, "step": 2640, "valid_targets_mean": 4276.6, "valid_targets_min": 799 }, { "epoch": 4.232, "grad_norm": 0.7110071687256778, "learning_rate": 2.818920912602294e-06, "loss": 0.405, "loss_nan_ranks": 0, "loss_rank_avg": 0.4124786853790283, "step": 2645, "valid_targets_mean": 2939.0, "valid_targets_min": 788 }, { "epoch": 4.24, "grad_norm": 0.5743817768129006, "learning_rate": 2.762000992214626e-06, "loss": 0.4164, "loss_nan_ranks": 0, "loss_rank_avg": 0.3974683880805969, "step": 2650, "valid_targets_mean": 4447.1, "valid_targets_min": 511 }, { "epoch": 4.248, "grad_norm": 0.5413226643387624, "learning_rate": 2.7056189635276162e-06, "loss": 0.3864, "loss_nan_ranks": 0, "loss_rank_avg": 0.3866695463657379, "step": 2655, "valid_targets_mean": 5034.9, "valid_targets_min": 677 }, { "epoch": 4.256, "grad_norm": 0.5726347506605515, "learning_rate": 2.6497765858768643e-06, "loss": 0.4024, "loss_nan_ranks": 0, "loss_rank_avg": 0.42359739542007446, "step": 2660, "valid_targets_mean": 4637.1, "valid_targets_min": 635 }, { "epoch": 4.264, "grad_norm": 0.4855797229728632, "learning_rate": 2.594475601758786e-06, "loss": 0.3905, "loss_nan_ranks": 0, "loss_rank_avg": 0.3630528748035431, "step": 2665, "valid_targets_mean": 5871.8, "valid_targets_min": 845 }, { "epoch": 4.272, "grad_norm": 0.6421484551855371, "learning_rate": 2.539717736776237e-06, "loss": 0.3901, "loss_nan_ranks": 0, "loss_rank_avg": 0.42318665981292725, "step": 2670, "valid_targets_mean": 4278.5, "valid_targets_min": 624 }, { "epoch": 4.28, "grad_norm": 0.6235765181112626, "learning_rate": 2.4855046995846844e-06, "loss": 0.3848, "loss_nan_ranks": 0, "loss_rank_avg": 0.4206055700778961, "step": 2675, "valid_targets_mean": 4279.4, "valid_targets_min": 484 }, { "epoch": 4.288, "grad_norm": 0.5687422916455531, "learning_rate": 2.431838181838868e-06, "loss": 0.4003, "loss_nan_ranks": 0, "loss_rank_avg": 0.38011544942855835, "step": 2680, "valid_targets_mean": 4600.6, "valid_targets_min": 931 }, { "epoch": 4.296, "grad_norm": 0.5597281309268304, "learning_rate": 2.3787198581400285e-06, "loss": 0.409, "loss_nan_ranks": 0, "loss_rank_avg": 0.3837645649909973, "step": 2685, "valid_targets_mean": 4807.3, "valid_targets_min": 852 }, { "epoch": 4.304, "grad_norm": 0.5208997470892812, "learning_rate": 2.3261513859836437e-06, "loss": 0.3891, "loss_nan_ranks": 0, "loss_rank_avg": 0.36763709783554077, "step": 2690, "valid_targets_mean": 5476.7, "valid_targets_min": 713 }, { "epoch": 4.312, "grad_norm": 0.729596593306008, "learning_rate": 2.27413440570772e-06, "loss": 0.4192, "loss_nan_ranks": 0, "loss_rank_avg": 0.47866132855415344, "step": 2695, "valid_targets_mean": 3093.9, "valid_targets_min": 779 }, { "epoch": 4.32, "grad_norm": 0.5963682532410031, "learning_rate": 2.222670540441596e-06, "loss": 0.3944, "loss_nan_ranks": 0, "loss_rank_avg": 0.35257434844970703, "step": 2700, "valid_targets_mean": 5420.8, "valid_targets_min": 776 }, { "epoch": 4.328, "grad_norm": 0.5229520235670004, "learning_rate": 2.17176139605531e-06, "loss": 0.4251, "loss_nan_ranks": 0, "loss_rank_avg": 0.3816877603530884, "step": 2705, "valid_targets_mean": 5403.9, "valid_targets_min": 725 }, { "epoch": 4.336, "grad_norm": 0.6555810725268437, "learning_rate": 2.121408561109466e-06, "loss": 0.4173, "loss_nan_ranks": 0, "loss_rank_avg": 0.38497596979141235, "step": 2710, "valid_targets_mean": 3670.6, "valid_targets_min": 734 }, { "epoch": 4.344, "grad_norm": 0.5360008671439541, "learning_rate": 2.071613606805696e-06, "loss": 0.4179, "loss_nan_ranks": 0, "loss_rank_avg": 0.4150103032588959, "step": 2715, "valid_targets_mean": 5754.9, "valid_targets_min": 1023 }, { "epoch": 4.352, "grad_norm": 0.5212551398201117, "learning_rate": 2.0223780869376018e-06, "loss": 0.394, "loss_nan_ranks": 0, "loss_rank_avg": 0.40325045585632324, "step": 2720, "valid_targets_mean": 5807.8, "valid_targets_min": 691 }, { "epoch": 4.36, "grad_norm": 0.501570979642165, "learning_rate": 1.9737035378422907e-06, "loss": 0.3989, "loss_nan_ranks": 0, "loss_rank_avg": 0.38142967224121094, "step": 2725, "valid_targets_mean": 5602.2, "valid_targets_min": 696 }, { "epoch": 4.368, "grad_norm": 0.6586918366767852, "learning_rate": 1.925591478352424e-06, "loss": 0.3876, "loss_nan_ranks": 0, "loss_rank_avg": 0.3896998167037964, "step": 2730, "valid_targets_mean": 3427.3, "valid_targets_min": 671 }, { "epoch": 4.376, "grad_norm": 0.5558539980698498, "learning_rate": 1.8780434097488443e-06, "loss": 0.4082, "loss_nan_ranks": 0, "loss_rank_avg": 0.4040504992008209, "step": 2735, "valid_targets_mean": 4714.9, "valid_targets_min": 695 }, { "epoch": 4.384, "grad_norm": 0.7867324950433691, "learning_rate": 1.831060815713699e-06, "loss": 0.4048, "loss_nan_ranks": 0, "loss_rank_avg": 0.4829801917076111, "step": 2740, "valid_targets_mean": 2988.8, "valid_targets_min": 288 }, { "epoch": 4.392, "grad_norm": 0.6214041417079301, "learning_rate": 1.7846451622841643e-06, "loss": 0.4194, "loss_nan_ranks": 0, "loss_rank_avg": 0.3955761194229126, "step": 2745, "valid_targets_mean": 3676.2, "valid_targets_min": 489 }, { "epoch": 4.4, "grad_norm": 0.5561873136372868, "learning_rate": 1.7387978978066988e-06, "loss": 0.4089, "loss_nan_ranks": 0, "loss_rank_avg": 0.3890697956085205, "step": 2750, "valid_targets_mean": 5758.4, "valid_targets_min": 882 }, { "epoch": 4.408, "grad_norm": 0.560721929365539, "learning_rate": 1.6935204528918347e-06, "loss": 0.3973, "loss_nan_ranks": 0, "loss_rank_avg": 0.40641331672668457, "step": 2755, "valid_targets_mean": 5017.4, "valid_targets_min": 521 }, { "epoch": 4.416, "grad_norm": 0.6889136427973638, "learning_rate": 1.6488142403695651e-06, "loss": 0.4157, "loss_nan_ranks": 0, "loss_rank_avg": 0.4421613812446594, "step": 2760, "valid_targets_mean": 3295.3, "valid_targets_min": 709 }, { "epoch": 4.424, "grad_norm": 0.7902577945988017, "learning_rate": 1.6046806552452254e-06, "loss": 0.4283, "loss_nan_ranks": 0, "loss_rank_avg": 0.4924136698246002, "step": 2765, "valid_targets_mean": 3308.9, "valid_targets_min": 751 }, { "epoch": 4.432, "grad_norm": 0.6844284345573052, "learning_rate": 1.5611210746559868e-06, "loss": 0.3938, "loss_nan_ranks": 0, "loss_rank_avg": 0.40334612131118774, "step": 2770, "valid_targets_mean": 3122.0, "valid_targets_min": 583 }, { "epoch": 4.44, "grad_norm": 0.6162273196275045, "learning_rate": 1.5181368578278744e-06, "loss": 0.4041, "loss_nan_ranks": 0, "loss_rank_avg": 0.35837671160697937, "step": 2775, "valid_targets_mean": 3665.4, "valid_targets_min": 718 }, { "epoch": 4.448, "grad_norm": 0.5974789854549655, "learning_rate": 1.4757293460333566e-06, "loss": 0.4308, "loss_nan_ranks": 0, "loss_rank_avg": 0.4109255075454712, "step": 2780, "valid_targets_mean": 4179.9, "valid_targets_min": 793 }, { "epoch": 4.456, "grad_norm": 0.5708600383249177, "learning_rate": 1.4338998625494905e-06, "loss": 0.4075, "loss_nan_ranks": 0, "loss_rank_avg": 0.3754119277000427, "step": 2785, "valid_targets_mean": 4436.0, "valid_targets_min": 773 }, { "epoch": 4.464, "grad_norm": 0.6313362954432985, "learning_rate": 1.3926497126166405e-06, "loss": 0.4204, "loss_nan_ranks": 0, "loss_rank_avg": 0.41806894540786743, "step": 2790, "valid_targets_mean": 4132.2, "valid_targets_min": 624 }, { "epoch": 4.4719999999999995, "grad_norm": 0.5994471438843727, "learning_rate": 1.3519801833977298e-06, "loss": 0.4271, "loss_nan_ranks": 0, "loss_rank_avg": 0.43274080753326416, "step": 2795, "valid_targets_mean": 4554.3, "valid_targets_min": 371 }, { "epoch": 4.48, "grad_norm": 0.5410383078069514, "learning_rate": 1.3118925439381003e-06, "loss": 0.4073, "loss_nan_ranks": 0, "loss_rank_avg": 0.35545891523361206, "step": 2800, "valid_targets_mean": 5008.1, "valid_targets_min": 563 }, { "epoch": 4.4879999999999995, "grad_norm": 0.7142880244968844, "learning_rate": 1.2723880451258918e-06, "loss": 0.4072, "loss_nan_ranks": 0, "loss_rank_avg": 0.4319564998149872, "step": 2805, "valid_targets_mean": 3327.8, "valid_targets_min": 717 }, { "epoch": 4.496, "grad_norm": 0.5901784201564468, "learning_rate": 1.2334679196530219e-06, "loss": 0.4332, "loss_nan_ranks": 0, "loss_rank_avg": 0.42878252267837524, "step": 2810, "valid_targets_mean": 4661.2, "valid_targets_min": 669 }, { "epoch": 4.504, "grad_norm": 0.5840631068232639, "learning_rate": 1.1951333819767163e-06, "loss": 0.4096, "loss_nan_ranks": 0, "loss_rank_avg": 0.392315149307251, "step": 2815, "valid_targets_mean": 4338.4, "valid_targets_min": 823 }, { "epoch": 4.5120000000000005, "grad_norm": 0.9670608326476431, "learning_rate": 1.157385628281622e-06, "loss": 0.4121, "loss_nan_ranks": 0, "loss_rank_avg": 0.409821093082428, "step": 2820, "valid_targets_mean": 2009.2, "valid_targets_min": 725 }, { "epoch": 4.52, "grad_norm": 0.6605390936042161, "learning_rate": 1.1202258364424633e-06, "loss": 0.4025, "loss_nan_ranks": 0, "loss_rank_avg": 0.41265183687210083, "step": 2825, "valid_targets_mean": 3653.6, "valid_targets_min": 676 }, { "epoch": 4.5280000000000005, "grad_norm": 0.512314838581097, "learning_rate": 1.0836551659873073e-06, "loss": 0.4052, "loss_nan_ranks": 0, "loss_rank_avg": 0.3962034583091736, "step": 2830, "valid_targets_mean": 5787.9, "valid_targets_min": 930 }, { "epoch": 4.536, "grad_norm": 0.5526075598864743, "learning_rate": 1.0476747580613723e-06, "loss": 0.4239, "loss_nan_ranks": 0, "loss_rank_avg": 0.37799936532974243, "step": 2835, "valid_targets_mean": 4742.4, "valid_targets_min": 616 }, { "epoch": 4.5440000000000005, "grad_norm": 0.5423110924774888, "learning_rate": 1.012285735391416e-06, "loss": 0.4318, "loss_nan_ranks": 0, "loss_rank_avg": 0.41332849860191345, "step": 2840, "valid_targets_mean": 5307.6, "valid_targets_min": 598 }, { "epoch": 4.552, "grad_norm": 0.611737571874644, "learning_rate": 9.774892022507166e-07, "loss": 0.4062, "loss_nan_ranks": 0, "loss_rank_avg": 0.3971940875053406, "step": 2845, "valid_targets_mean": 4247.7, "valid_targets_min": 771 }, { "epoch": 4.5600000000000005, "grad_norm": 0.7135757278290046, "learning_rate": 9.432862444245994e-07, "loss": 0.4359, "loss_nan_ranks": 0, "loss_rank_avg": 0.41892296075820923, "step": 2850, "valid_targets_mean": 3975.4, "valid_targets_min": 789 }, { "epoch": 4.568, "grad_norm": 0.7473593705860603, "learning_rate": 9.096779291765667e-07, "loss": 0.4205, "loss_nan_ranks": 0, "loss_rank_avg": 0.4587504267692566, "step": 2855, "valid_targets_mean": 2893.6, "valid_targets_min": 813 }, { "epoch": 4.576, "grad_norm": 0.6873974964018028, "learning_rate": 8.766653052149831e-07, "loss": 0.4326, "loss_nan_ranks": 0, "loss_rank_avg": 0.44257092475891113, "step": 2860, "valid_targets_mean": 3811.4, "valid_targets_min": 924 }, { "epoch": 4.584, "grad_norm": 0.7683330781359213, "learning_rate": 8.442494026603709e-07, "loss": 0.4131, "loss_nan_ranks": 0, "loss_rank_avg": 0.44089746475219727, "step": 2865, "valid_targets_mean": 3014.2, "valid_targets_min": 687 }, { "epoch": 4.592, "grad_norm": 0.5846368499398767, "learning_rate": 8.124312330132423e-07, "loss": 0.3811, "loss_nan_ranks": 0, "loss_rank_avg": 0.3351588845252991, "step": 2870, "valid_targets_mean": 3851.7, "valid_targets_min": 677 }, { "epoch": 4.6, "grad_norm": 0.5968786949619281, "learning_rate": 7.812117891225667e-07, "loss": 0.405, "loss_nan_ranks": 0, "loss_rank_avg": 0.3558521866798401, "step": 2875, "valid_targets_mean": 3713.2, "valid_targets_min": 783 }, { "epoch": 4.608, "grad_norm": 0.6212588253374268, "learning_rate": 7.505920451547544e-07, "loss": 0.4055, "loss_nan_ranks": 0, "loss_rank_avg": 0.3719406723976135, "step": 2880, "valid_targets_mean": 3888.3, "valid_targets_min": 516 }, { "epoch": 4.616, "grad_norm": 0.8263222292237222, "learning_rate": 7.205729565632947e-07, "loss": 0.4107, "loss_nan_ranks": 0, "loss_rank_avg": 0.415608674287796, "step": 2885, "valid_targets_mean": 2358.3, "valid_targets_min": 738 }, { "epoch": 4.624, "grad_norm": 1.2742274611700906, "learning_rate": 6.911554600589121e-07, "loss": 0.4135, "loss_nan_ranks": 0, "loss_rank_avg": 0.3934563398361206, "step": 2890, "valid_targets_mean": 4383.9, "valid_targets_min": 621 }, { "epoch": 4.632, "grad_norm": 0.7165845833648357, "learning_rate": 6.62340473580354e-07, "loss": 0.422, "loss_nan_ranks": 0, "loss_rank_avg": 0.46797922253608704, "step": 2895, "valid_targets_mean": 3301.2, "valid_targets_min": 779 }, { "epoch": 4.64, "grad_norm": 0.5652440899945861, "learning_rate": 6.341288962657422e-07, "loss": 0.4163, "loss_nan_ranks": 0, "loss_rank_avg": 0.4029410779476166, "step": 2900, "valid_targets_mean": 4548.8, "valid_targets_min": 447 }, { "epoch": 4.648, "grad_norm": 0.6700159540879693, "learning_rate": 6.06521608424524e-07, "loss": 0.4329, "loss_nan_ranks": 0, "loss_rank_avg": 0.4570663273334503, "step": 2905, "valid_targets_mean": 4093.9, "valid_targets_min": 597 }, { "epoch": 4.656, "grad_norm": 0.6045546990414774, "learning_rate": 5.795194715099905e-07, "loss": 0.3856, "loss_nan_ranks": 0, "loss_rank_avg": 0.4214397966861725, "step": 2910, "valid_targets_mean": 5777.8, "valid_targets_min": 940 }, { "epoch": 4.664, "grad_norm": 0.5952012215364537, "learning_rate": 5.531233280924042e-07, "loss": 0.4219, "loss_nan_ranks": 0, "loss_rank_avg": 0.3828292787075043, "step": 2915, "valid_targets_mean": 4322.4, "valid_targets_min": 615 }, { "epoch": 4.672, "grad_norm": 0.6450876587783286, "learning_rate": 5.273340018327044e-07, "loss": 0.4067, "loss_nan_ranks": 0, "loss_rank_avg": 0.4427468776702881, "step": 2920, "valid_targets_mean": 3636.7, "valid_targets_min": 686 }, { "epoch": 4.68, "grad_norm": 0.8489893578004784, "learning_rate": 5.02152297456806e-07, "loss": 0.4044, "loss_nan_ranks": 0, "loss_rank_avg": 0.44238054752349854, "step": 2925, "valid_targets_mean": 2258.9, "valid_targets_min": 535 }, { "epoch": 4.688, "grad_norm": 0.7104670754151398, "learning_rate": 4.775790007304993e-07, "loss": 0.4145, "loss_nan_ranks": 0, "loss_rank_avg": 0.43017131090164185, "step": 2930, "valid_targets_mean": 3351.2, "valid_targets_min": 573 }, { "epoch": 4.696, "grad_norm": 0.6357385020238071, "learning_rate": 4.5361487843490924e-07, "loss": 0.4049, "loss_nan_ranks": 0, "loss_rank_avg": 0.4254443347454071, "step": 2935, "valid_targets_mean": 4101.7, "valid_targets_min": 683 }, { "epoch": 4.704, "grad_norm": 0.5732449316192012, "learning_rate": 4.3026067834258667e-07, "loss": 0.4204, "loss_nan_ranks": 0, "loss_rank_avg": 0.4033844470977783, "step": 2940, "valid_targets_mean": 4472.9, "valid_targets_min": 540 }, { "epoch": 4.712, "grad_norm": 0.48553171918656174, "learning_rate": 4.0751712919417484e-07, "loss": 0.3781, "loss_nan_ranks": 0, "loss_rank_avg": 0.3926544785499573, "step": 2945, "valid_targets_mean": 6882.5, "valid_targets_min": 682 }, { "epoch": 4.72, "grad_norm": 0.6821340139530365, "learning_rate": 3.853849406756549e-07, "loss": 0.4099, "loss_nan_ranks": 0, "loss_rank_avg": 0.42946845293045044, "step": 2950, "valid_targets_mean": 3391.1, "valid_targets_min": 373 }, { "epoch": 4.728, "grad_norm": 0.6654823260184427, "learning_rate": 3.6386480339621886e-07, "loss": 0.3988, "loss_nan_ranks": 0, "loss_rank_avg": 0.39326146245002747, "step": 2955, "valid_targets_mean": 3489.5, "valid_targets_min": 652 }, { "epoch": 4.736, "grad_norm": 0.6083477077382592, "learning_rate": 3.4295738886670925e-07, "loss": 0.4034, "loss_nan_ranks": 0, "loss_rank_avg": 0.3781413435935974, "step": 2960, "valid_targets_mean": 4359.6, "valid_targets_min": 617 }, { "epoch": 4.744, "grad_norm": 0.6597022939202071, "learning_rate": 3.226633494786668e-07, "loss": 0.3878, "loss_nan_ranks": 0, "loss_rank_avg": 0.3716740012168884, "step": 2965, "valid_targets_mean": 3212.9, "valid_targets_min": 729 }, { "epoch": 4.752, "grad_norm": 0.8113317008052021, "learning_rate": 3.0298331848398033e-07, "loss": 0.4039, "loss_nan_ranks": 0, "loss_rank_avg": 0.4363306760787964, "step": 2970, "valid_targets_mean": 3148.3, "valid_targets_min": 715 }, { "epoch": 4.76, "grad_norm": 0.6243007406816568, "learning_rate": 2.839179099751133e-07, "loss": 0.4149, "loss_nan_ranks": 0, "loss_rank_avg": 0.4543706178665161, "step": 2975, "valid_targets_mean": 4166.6, "valid_targets_min": 866 }, { "epoch": 4.768, "grad_norm": 0.6224022376593623, "learning_rate": 2.654677188659549e-07, "loss": 0.4168, "loss_nan_ranks": 0, "loss_rank_avg": 0.451460063457489, "step": 2980, "valid_targets_mean": 4207.4, "valid_targets_min": 748 }, { "epoch": 4.776, "grad_norm": 0.5374144174742845, "learning_rate": 2.476333208732462e-07, "loss": 0.4053, "loss_nan_ranks": 0, "loss_rank_avg": 0.42308709025382996, "step": 2985, "valid_targets_mean": 6995.9, "valid_targets_min": 780 }, { "epoch": 4.784, "grad_norm": 0.6285382216596294, "learning_rate": 2.3041527249863193e-07, "loss": 0.4221, "loss_nan_ranks": 0, "loss_rank_avg": 0.42085695266723633, "step": 2990, "valid_targets_mean": 3705.4, "valid_targets_min": 915 }, { "epoch": 4.792, "grad_norm": 0.6313424923207209, "learning_rate": 2.1381411101127013e-07, "loss": 0.4242, "loss_nan_ranks": 0, "loss_rank_avg": 0.4452266991138458, "step": 2995, "valid_targets_mean": 4179.6, "valid_targets_min": 654 }, { "epoch": 4.8, "grad_norm": 0.5405302367882336, "learning_rate": 1.9783035443108999e-07, "loss": 0.3761, "loss_nan_ranks": 0, "loss_rank_avg": 0.3892124891281128, "step": 3000, "valid_targets_mean": 4694.6, "valid_targets_min": 726 }, { "epoch": 4.808, "grad_norm": 0.6499699005097503, "learning_rate": 1.8246450151261362e-07, "loss": 0.4126, "loss_nan_ranks": 0, "loss_rank_avg": 0.46307098865509033, "step": 3005, "valid_targets_mean": 4108.7, "valid_targets_min": 734 }, { "epoch": 4.816, "grad_norm": 0.6639498536382377, "learning_rate": 1.6771703172940635e-07, "loss": 0.3848, "loss_nan_ranks": 0, "loss_rank_avg": 0.44725149869918823, "step": 3010, "valid_targets_mean": 3558.9, "valid_targets_min": 663 }, { "epoch": 4.824, "grad_norm": 0.549526064647749, "learning_rate": 1.5358840525909967e-07, "loss": 0.3986, "loss_nan_ranks": 0, "loss_rank_avg": 0.4009050130844116, "step": 3015, "valid_targets_mean": 4778.8, "valid_targets_min": 671 }, { "epoch": 4.832, "grad_norm": 0.7167556183864082, "learning_rate": 1.4007906296904072e-07, "loss": 0.3943, "loss_nan_ranks": 0, "loss_rank_avg": 0.43567198514938354, "step": 3020, "valid_targets_mean": 3120.4, "valid_targets_min": 547 }, { "epoch": 4.84, "grad_norm": 0.7952417316247405, "learning_rate": 1.2718942640254084e-07, "loss": 0.4024, "loss_nan_ranks": 0, "loss_rank_avg": 0.4332461953163147, "step": 3025, "valid_targets_mean": 2783.6, "valid_targets_min": 646 }, { "epoch": 4.848, "grad_norm": 0.5604252297788266, "learning_rate": 1.1491989776570623e-07, "loss": 0.4079, "loss_nan_ranks": 0, "loss_rank_avg": 0.3904341757297516, "step": 3030, "valid_targets_mean": 4534.9, "valid_targets_min": 864 }, { "epoch": 4.856, "grad_norm": 0.6858298477952322, "learning_rate": 1.0327085991490127e-07, "loss": 0.4213, "loss_nan_ranks": 0, "loss_rank_avg": 0.4092717170715332, "step": 3035, "valid_targets_mean": 3204.4, "valid_targets_min": 509 }, { "epoch": 4.864, "grad_norm": 0.6401379988844095, "learning_rate": 9.22426763447981e-08, "loss": 0.4118, "loss_nan_ranks": 0, "loss_rank_avg": 0.41920021176338196, "step": 3040, "valid_targets_mean": 3930.8, "valid_targets_min": 643 }, { "epoch": 4.872, "grad_norm": 0.5334024885638476, "learning_rate": 8.183569117703461e-08, "loss": 0.3955, "loss_nan_ranks": 0, "loss_rank_avg": 0.3883068263530731, "step": 3045, "valid_targets_mean": 5270.0, "valid_targets_min": 816 }, { "epoch": 4.88, "grad_norm": 0.5549772986705793, "learning_rate": 7.205022914946957e-08, "loss": 0.4232, "loss_nan_ranks": 0, "loss_rank_avg": 0.38646113872528076, "step": 3050, "valid_targets_mean": 4886.3, "valid_targets_min": 707 }, { "epoch": 4.888, "grad_norm": 0.5767956457076785, "learning_rate": 6.288659560606203e-08, "loss": 0.4086, "loss_nan_ranks": 0, "loss_rank_avg": 0.3830558657646179, "step": 3055, "valid_targets_mean": 4712.8, "valid_targets_min": 727 }, { "epoch": 4.896, "grad_norm": 0.5988889732664718, "learning_rate": 5.4345076487332114e-08, "loss": 0.3863, "loss_nan_ranks": 0, "loss_rank_avg": 0.4035729169845581, "step": 3060, "valid_targets_mean": 4299.6, "valid_targets_min": 763 }, { "epoch": 4.904, "grad_norm": 0.5205741542745066, "learning_rate": 4.642593832144382e-08, "loss": 0.4224, "loss_nan_ranks": 0, "loss_rank_avg": 0.36331793665885925, "step": 3065, "valid_targets_mean": 4843.8, "valid_targets_min": 939 }, { "epoch": 4.912, "grad_norm": 0.6165591241445428, "learning_rate": 3.912942821589161e-08, "loss": 0.3843, "loss_nan_ranks": 0, "loss_rank_avg": 0.3959740400314331, "step": 3070, "valid_targets_mean": 4223.8, "valid_targets_min": 912 }, { "epoch": 4.92, "grad_norm": 0.6355765004228577, "learning_rate": 3.2455773849779935e-08, "loss": 0.4104, "loss_nan_ranks": 0, "loss_rank_avg": 0.42105889320373535, "step": 3075, "valid_targets_mean": 3869.8, "valid_targets_min": 743 }, { "epoch": 4.928, "grad_norm": 0.7183753779829565, "learning_rate": 2.6405183466731154e-08, "loss": 0.4293, "loss_nan_ranks": 0, "loss_rank_avg": 0.46973997354507446, "step": 3080, "valid_targets_mean": 3456.1, "valid_targets_min": 903 }, { "epoch": 4.936, "grad_norm": 0.665925414774037, "learning_rate": 2.0977845868375145e-08, "loss": 0.4192, "loss_nan_ranks": 0, "loss_rank_avg": 0.4395608603954315, "step": 3085, "valid_targets_mean": 3503.2, "valid_targets_min": 615 }, { "epoch": 4.944, "grad_norm": 0.6663253811911488, "learning_rate": 1.6173930408467376e-08, "loss": 0.4184, "loss_nan_ranks": 0, "loss_rank_avg": 0.4220019578933716, "step": 3090, "valid_targets_mean": 3666.4, "valid_targets_min": 663 }, { "epoch": 4.952, "grad_norm": 0.628895057963846, "learning_rate": 1.199358698759978e-08, "loss": 0.3879, "loss_nan_ranks": 0, "loss_rank_avg": 0.3834680914878845, "step": 3095, "valid_targets_mean": 3802.9, "valid_targets_min": 468 }, { "epoch": 4.96, "grad_norm": 0.6447997763250441, "learning_rate": 8.436946048522298e-09, "loss": 0.4109, "loss_nan_ranks": 0, "loss_rank_avg": 0.4318605065345764, "step": 3100, "valid_targets_mean": 3820.8, "valid_targets_min": 568 }, { "epoch": 4.968, "grad_norm": 0.4941475635121412, "learning_rate": 5.504118572081662e-09, "loss": 0.3983, "loss_nan_ranks": 0, "loss_rank_avg": 0.3983478844165802, "step": 3105, "valid_targets_mean": 6376.6, "valid_targets_min": 924 }, { "epoch": 4.976, "grad_norm": 0.6289386884950056, "learning_rate": 3.1951960737419686e-09, "loss": 0.4222, "loss_nan_ranks": 0, "loss_rank_avg": 0.4331316351890564, "step": 3110, "valid_targets_mean": 4068.7, "valid_targets_min": 763 }, { "epoch": 4.984, "grad_norm": 0.9118066646290699, "learning_rate": 1.5102506007447227e-09, "loss": 0.3988, "loss_nan_ranks": 0, "loss_rank_avg": 0.4566645622253418, "step": 3115, "valid_targets_mean": 2408.6, "valid_targets_min": 734 }, { "epoch": 4.992, "grad_norm": 0.5570389322501734, "learning_rate": 4.493347298528683e-10, "loss": 0.4032, "loss_nan_ranks": 0, "loss_rank_avg": 0.3776412904262543, "step": 3120, "valid_targets_mean": 4442.2, "valid_targets_min": 657 }, { "epoch": 5.0, "grad_norm": 0.6109238323745433, "learning_rate": 1.248156571209691e-11, "loss": 0.3841, "loss_nan_ranks": 0, "loss_rank_avg": 0.37494388222694397, "step": 3125, "valid_targets_mean": 3849.6, "valid_targets_min": 730 }, { "epoch": 5.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.37494388222694397, "step": 3125, "total_flos": 883823926247424.0, "train_loss": 0.468077876701355, "train_runtime": 22577.6779, "train_samples_per_second": 2.212, "train_steps_per_second": 0.138, "valid_targets_mean": 3849.6, "valid_targets_min": 730 } ], "logging_steps": 5, "max_steps": 3125, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 883823926247424.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }