| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 3125, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008, | |
| "grad_norm": 5.049601892729115, | |
| "learning_rate": 5.111821086261981e-07, | |
| "loss": 0.7278, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43948009610176086, | |
| "step": 5, | |
| "valid_targets_mean": 7209.1, | |
| "valid_targets_min": 1277 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 5.277364451906386, | |
| "learning_rate": 1.1501597444089457e-06, | |
| "loss": 0.6908, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30664390325546265, | |
| "step": 10, | |
| "valid_targets_mean": 3595.1, | |
| "valid_targets_min": 964 | |
| }, | |
| { | |
| "epoch": 0.024, | |
| "grad_norm": 3.9698434226273944, | |
| "learning_rate": 1.7891373801916933e-06, | |
| "loss": 0.6463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3173980712890625, | |
| "step": 15, | |
| "valid_targets_mean": 4277.0, | |
| "valid_targets_min": 1066 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 4.147563403598727, | |
| "learning_rate": 2.428115015974441e-06, | |
| "loss": 0.6988, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3903578519821167, | |
| "step": 20, | |
| "valid_targets_mean": 4019.9, | |
| "valid_targets_min": 1172 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 2.579151645965659, | |
| "learning_rate": 3.0670926517571885e-06, | |
| "loss": 0.653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23339255154132843, | |
| "step": 25, | |
| "valid_targets_mean": 2710.8, | |
| "valid_targets_min": 854 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 1.6363825890362744, | |
| "learning_rate": 3.7060702875399364e-06, | |
| "loss": 0.6401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2710292339324951, | |
| "step": 30, | |
| "valid_targets_mean": 4470.5, | |
| "valid_targets_min": 1069 | |
| }, | |
| { | |
| "epoch": 0.056, | |
| "grad_norm": 1.093070288309869, | |
| "learning_rate": 4.345047923322684e-06, | |
| "loss": 0.5552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2130659818649292, | |
| "step": 35, | |
| "valid_targets_mean": 2896.5, | |
| "valid_targets_min": 757 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 0.8596561845629843, | |
| "learning_rate": 4.984025559105431e-06, | |
| "loss": 0.5892, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3451421856880188, | |
| "step": 40, | |
| "valid_targets_mean": 6655.2, | |
| "valid_targets_min": 1658 | |
| }, | |
| { | |
| "epoch": 0.072, | |
| "grad_norm": 0.720981615728207, | |
| "learning_rate": 5.623003194888179e-06, | |
| "loss": 0.5639, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23500263690948486, | |
| "step": 45, | |
| "valid_targets_mean": 4629.5, | |
| "valid_targets_min": 1128 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.7648020352559365, | |
| "learning_rate": 6.261980830670928e-06, | |
| "loss": 0.546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28623056411743164, | |
| "step": 50, | |
| "valid_targets_mean": 4455.5, | |
| "valid_targets_min": 557 | |
| }, | |
| { | |
| "epoch": 0.088, | |
| "grad_norm": 0.8070597567729144, | |
| "learning_rate": 6.900958466453675e-06, | |
| "loss": 0.5496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21983392536640167, | |
| "step": 55, | |
| "valid_targets_mean": 2054.8, | |
| "valid_targets_min": 867 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 0.5770805999001849, | |
| "learning_rate": 7.5399361022364225e-06, | |
| "loss": 0.5428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2292506992816925, | |
| "step": 60, | |
| "valid_targets_mean": 4125.5, | |
| "valid_targets_min": 699 | |
| }, | |
| { | |
| "epoch": 0.104, | |
| "grad_norm": 0.5971913839474638, | |
| "learning_rate": 8.17891373801917e-06, | |
| "loss": 0.4938, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20313440263271332, | |
| "step": 65, | |
| "valid_targets_mean": 3079.9, | |
| "valid_targets_min": 756 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 0.686250956100844, | |
| "learning_rate": 8.817891373801917e-06, | |
| "loss": 0.5128, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2895665466785431, | |
| "step": 70, | |
| "valid_targets_mean": 2778.6, | |
| "valid_targets_min": 667 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.6575289237390317, | |
| "learning_rate": 9.456869009584665e-06, | |
| "loss": 0.5072, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24841192364692688, | |
| "step": 75, | |
| "valid_targets_mean": 3479.6, | |
| "valid_targets_min": 661 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.6049593114695113, | |
| "learning_rate": 1.0095846645367413e-05, | |
| "loss": 0.4851, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2605184018611908, | |
| "step": 80, | |
| "valid_targets_mean": 3057.5, | |
| "valid_targets_min": 1042 | |
| }, | |
| { | |
| "epoch": 0.136, | |
| "grad_norm": 0.594174930790963, | |
| "learning_rate": 1.073482428115016e-05, | |
| "loss": 0.475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12189821153879166, | |
| "step": 85, | |
| "valid_targets_mean": 2004.9, | |
| "valid_targets_min": 849 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.6352082822362639, | |
| "learning_rate": 1.1373801916932907e-05, | |
| "loss": 0.4988, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3049646317958832, | |
| "step": 90, | |
| "valid_targets_mean": 3373.8, | |
| "valid_targets_min": 1120 | |
| }, | |
| { | |
| "epoch": 0.152, | |
| "grad_norm": 0.6122762651129019, | |
| "learning_rate": 1.2012779552715656e-05, | |
| "loss": 0.5111, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32920581102371216, | |
| "step": 95, | |
| "valid_targets_mean": 3691.1, | |
| "valid_targets_min": 840 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.4919427231068009, | |
| "learning_rate": 1.2651757188498404e-05, | |
| "loss": 0.4916, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16588306427001953, | |
| "step": 100, | |
| "valid_targets_mean": 4110.8, | |
| "valid_targets_min": 665 | |
| }, | |
| { | |
| "epoch": 0.168, | |
| "grad_norm": 0.567123357639076, | |
| "learning_rate": 1.329073482428115e-05, | |
| "loss": 0.5458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2980887293815613, | |
| "step": 105, | |
| "valid_targets_mean": 4432.4, | |
| "valid_targets_min": 716 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 0.5563571872329078, | |
| "learning_rate": 1.39297124600639e-05, | |
| "loss": 0.4754, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30753272771835327, | |
| "step": 110, | |
| "valid_targets_mean": 5548.8, | |
| "valid_targets_min": 1114 | |
| }, | |
| { | |
| "epoch": 0.184, | |
| "grad_norm": 0.5033412953896856, | |
| "learning_rate": 1.4568690095846648e-05, | |
| "loss": 0.4713, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19983811676502228, | |
| "step": 115, | |
| "valid_targets_mean": 3808.8, | |
| "valid_targets_min": 1071 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.5542569365948462, | |
| "learning_rate": 1.5207667731629394e-05, | |
| "loss": 0.4201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1442127823829651, | |
| "step": 120, | |
| "valid_targets_mean": 1944.8, | |
| "valid_targets_min": 727 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.5369920791073214, | |
| "learning_rate": 1.584664536741214e-05, | |
| "loss": 0.4804, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21610140800476074, | |
| "step": 125, | |
| "valid_targets_mean": 4456.5, | |
| "valid_targets_min": 938 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 0.5934402873809017, | |
| "learning_rate": 1.648562300319489e-05, | |
| "loss": 0.4488, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21678262948989868, | |
| "step": 130, | |
| "valid_targets_mean": 2704.4, | |
| "valid_targets_min": 1028 | |
| }, | |
| { | |
| "epoch": 0.216, | |
| "grad_norm": 0.6167459028653418, | |
| "learning_rate": 1.712460063897764e-05, | |
| "loss": 0.4635, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26390573382377625, | |
| "step": 135, | |
| "valid_targets_mean": 3041.1, | |
| "valid_targets_min": 690 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 0.6057571170803768, | |
| "learning_rate": 1.7763578274760385e-05, | |
| "loss": 0.473, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2703923285007477, | |
| "step": 140, | |
| "valid_targets_mean": 3405.5, | |
| "valid_targets_min": 1388 | |
| }, | |
| { | |
| "epoch": 0.232, | |
| "grad_norm": 0.5964087448191424, | |
| "learning_rate": 1.840255591054313e-05, | |
| "loss": 0.4281, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12820664048194885, | |
| "step": 145, | |
| "valid_targets_mean": 2341.5, | |
| "valid_targets_min": 548 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.7944123967649483, | |
| "learning_rate": 1.904153354632588e-05, | |
| "loss": 0.4823, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3033648133277893, | |
| "step": 150, | |
| "valid_targets_mean": 2520.9, | |
| "valid_targets_min": 945 | |
| }, | |
| { | |
| "epoch": 0.248, | |
| "grad_norm": 0.6692216622848669, | |
| "learning_rate": 1.9680511182108627e-05, | |
| "loss": 0.4764, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.352511465549469, | |
| "step": 155, | |
| "valid_targets_mean": 3567.8, | |
| "valid_targets_min": 1011 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.5976706030380227, | |
| "learning_rate": 2.0319488817891376e-05, | |
| "loss": 0.4391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25001269578933716, | |
| "step": 160, | |
| "valid_targets_mean": 3181.8, | |
| "valid_targets_min": 1389 | |
| }, | |
| { | |
| "epoch": 0.264, | |
| "grad_norm": 1.0239772828743636, | |
| "learning_rate": 2.0958466453674126e-05, | |
| "loss": 0.446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16786524653434753, | |
| "step": 165, | |
| "valid_targets_mean": 3835.6, | |
| "valid_targets_min": 1012 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 0.42177417682653895, | |
| "learning_rate": 2.1597444089456872e-05, | |
| "loss": 0.4304, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19490358233451843, | |
| "step": 170, | |
| "valid_targets_mean": 6332.6, | |
| "valid_targets_min": 948 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 0.5895166731709086, | |
| "learning_rate": 2.2236421725239618e-05, | |
| "loss": 0.465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22322815656661987, | |
| "step": 175, | |
| "valid_targets_mean": 3828.6, | |
| "valid_targets_min": 947 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.6030927972832155, | |
| "learning_rate": 2.2875399361022364e-05, | |
| "loss": 0.3898, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1875823736190796, | |
| "step": 180, | |
| "valid_targets_mean": 3343.1, | |
| "valid_targets_min": 790 | |
| }, | |
| { | |
| "epoch": 0.296, | |
| "grad_norm": 0.45033732473663274, | |
| "learning_rate": 2.3514376996805114e-05, | |
| "loss": 0.4055, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16341853141784668, | |
| "step": 185, | |
| "valid_targets_mean": 4346.8, | |
| "valid_targets_min": 621 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 0.6410357135895351, | |
| "learning_rate": 2.415335463258786e-05, | |
| "loss": 0.451, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2382049858570099, | |
| "step": 190, | |
| "valid_targets_mean": 2598.9, | |
| "valid_targets_min": 869 | |
| }, | |
| { | |
| "epoch": 0.312, | |
| "grad_norm": 0.40862470776459403, | |
| "learning_rate": 2.4792332268370606e-05, | |
| "loss": 0.4398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19033300876617432, | |
| "step": 195, | |
| "valid_targets_mean": 8101.2, | |
| "valid_targets_min": 1425 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.4639247345136467, | |
| "learning_rate": 2.543130990415336e-05, | |
| "loss": 0.4575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1774352490901947, | |
| "step": 200, | |
| "valid_targets_mean": 3866.1, | |
| "valid_targets_min": 1002 | |
| }, | |
| { | |
| "epoch": 0.328, | |
| "grad_norm": 0.6045432758950621, | |
| "learning_rate": 2.6070287539936105e-05, | |
| "loss": 0.4629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20918956398963928, | |
| "step": 205, | |
| "valid_targets_mean": 4064.6, | |
| "valid_targets_min": 907 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 0.49705632653844295, | |
| "learning_rate": 2.670926517571885e-05, | |
| "loss": 0.4128, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20427216589450836, | |
| "step": 210, | |
| "valid_targets_mean": 5069.9, | |
| "valid_targets_min": 701 | |
| }, | |
| { | |
| "epoch": 0.344, | |
| "grad_norm": 0.48520489500855396, | |
| "learning_rate": 2.73482428115016e-05, | |
| "loss": 0.4252, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34422576427459717, | |
| "step": 215, | |
| "valid_targets_mean": 7910.2, | |
| "valid_targets_min": 1506 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 0.4668215278946658, | |
| "learning_rate": 2.7987220447284347e-05, | |
| "loss": 0.4237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21643120050430298, | |
| "step": 220, | |
| "valid_targets_mean": 5393.4, | |
| "valid_targets_min": 737 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.5943792588027108, | |
| "learning_rate": 2.8626198083067093e-05, | |
| "loss": 0.4247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13178721070289612, | |
| "step": 225, | |
| "valid_targets_mean": 2101.8, | |
| "valid_targets_min": 858 | |
| }, | |
| { | |
| "epoch": 0.368, | |
| "grad_norm": 0.5517991295062519, | |
| "learning_rate": 2.9265175718849843e-05, | |
| "loss": 0.3864, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14202260971069336, | |
| "step": 230, | |
| "valid_targets_mean": 2987.0, | |
| "valid_targets_min": 814 | |
| }, | |
| { | |
| "epoch": 0.376, | |
| "grad_norm": 0.6080233680136686, | |
| "learning_rate": 2.9904153354632592e-05, | |
| "loss": 0.4359, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2212154120206833, | |
| "step": 235, | |
| "valid_targets_mean": 3522.0, | |
| "valid_targets_min": 1157 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.7001104625755565, | |
| "learning_rate": 3.054313099041534e-05, | |
| "loss": 0.4193, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32441866397857666, | |
| "step": 240, | |
| "valid_targets_mean": 3398.5, | |
| "valid_targets_min": 700 | |
| }, | |
| { | |
| "epoch": 0.392, | |
| "grad_norm": 0.5240312255698709, | |
| "learning_rate": 3.1182108626198084e-05, | |
| "loss": 0.4328, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23227915167808533, | |
| "step": 245, | |
| "valid_targets_mean": 4712.2, | |
| "valid_targets_min": 666 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.8648496001859342, | |
| "learning_rate": 3.1821086261980834e-05, | |
| "loss": 0.4262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19384440779685974, | |
| "step": 250, | |
| "valid_targets_mean": 1589.8, | |
| "valid_targets_min": 634 | |
| }, | |
| { | |
| "epoch": 0.408, | |
| "grad_norm": 0.5041946092525469, | |
| "learning_rate": 3.246006389776358e-05, | |
| "loss": 0.3766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23754949867725372, | |
| "step": 255, | |
| "valid_targets_mean": 5545.6, | |
| "valid_targets_min": 697 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 0.741812736773093, | |
| "learning_rate": 3.3099041533546326e-05, | |
| "loss": 0.4342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19546037912368774, | |
| "step": 260, | |
| "valid_targets_mean": 1842.6, | |
| "valid_targets_min": 753 | |
| }, | |
| { | |
| "epoch": 0.424, | |
| "grad_norm": 0.6249635080823627, | |
| "learning_rate": 3.3738019169329076e-05, | |
| "loss": 0.3958, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3015894591808319, | |
| "step": 265, | |
| "valid_targets_mean": 3354.1, | |
| "valid_targets_min": 688 | |
| }, | |
| { | |
| "epoch": 0.432, | |
| "grad_norm": 0.5157995715206686, | |
| "learning_rate": 3.4376996805111825e-05, | |
| "loss": 0.4649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23236235976219177, | |
| "step": 270, | |
| "valid_targets_mean": 4596.9, | |
| "valid_targets_min": 1140 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 0.6481359309360338, | |
| "learning_rate": 3.5015974440894575e-05, | |
| "loss": 0.4068, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1774221956729889, | |
| "step": 275, | |
| "valid_targets_mean": 2685.8, | |
| "valid_targets_min": 448 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.5003694452381771, | |
| "learning_rate": 3.565495207667732e-05, | |
| "loss": 0.3946, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20578652620315552, | |
| "step": 280, | |
| "valid_targets_mean": 5089.4, | |
| "valid_targets_min": 1146 | |
| }, | |
| { | |
| "epoch": 0.456, | |
| "grad_norm": 0.5430128250712549, | |
| "learning_rate": 3.629392971246007e-05, | |
| "loss": 0.4262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19618654251098633, | |
| "step": 285, | |
| "valid_targets_mean": 2835.6, | |
| "valid_targets_min": 1099 | |
| }, | |
| { | |
| "epoch": 0.464, | |
| "grad_norm": 0.5889443716013671, | |
| "learning_rate": 3.6932907348242816e-05, | |
| "loss": 0.4326, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26432836055755615, | |
| "step": 290, | |
| "valid_targets_mean": 4703.8, | |
| "valid_targets_min": 1265 | |
| }, | |
| { | |
| "epoch": 0.472, | |
| "grad_norm": 0.5008891357690028, | |
| "learning_rate": 3.757188498402556e-05, | |
| "loss": 0.4477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28413641452789307, | |
| "step": 295, | |
| "valid_targets_mean": 6156.9, | |
| "valid_targets_min": 1117 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.45277012879942824, | |
| "learning_rate": 3.821086261980831e-05, | |
| "loss": 0.4005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2018493264913559, | |
| "step": 300, | |
| "valid_targets_mean": 6251.2, | |
| "valid_targets_min": 699 | |
| }, | |
| { | |
| "epoch": 0.488, | |
| "grad_norm": 0.5498771344672786, | |
| "learning_rate": 3.884984025559106e-05, | |
| "loss": 0.4067, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20270295441150665, | |
| "step": 305, | |
| "valid_targets_mean": 3713.6, | |
| "valid_targets_min": 426 | |
| }, | |
| { | |
| "epoch": 0.496, | |
| "grad_norm": 0.4849980047118573, | |
| "learning_rate": 3.94888178913738e-05, | |
| "loss": 0.4172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19320783019065857, | |
| "step": 310, | |
| "valid_targets_mean": 3616.2, | |
| "valid_targets_min": 1182 | |
| }, | |
| { | |
| "epoch": 0.504, | |
| "grad_norm": 0.46547683956214736, | |
| "learning_rate": 3.9999987518434296e-05, | |
| "loss": 0.4113, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16570322215557098, | |
| "step": 315, | |
| "valid_targets_mean": 4545.1, | |
| "valid_targets_min": 894 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.6433873513411175, | |
| "learning_rate": 3.999955066527015e-05, | |
| "loss": 0.4134, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2329372763633728, | |
| "step": 320, | |
| "valid_targets_mean": 3225.2, | |
| "valid_targets_min": 1171 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 0.48867055877170756, | |
| "learning_rate": 3.999848974939926e-05, | |
| "loss": 0.3981, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11635329574346542, | |
| "step": 325, | |
| "valid_targets_mean": 3431.9, | |
| "valid_targets_min": 713 | |
| }, | |
| { | |
| "epoch": 0.528, | |
| "grad_norm": 1.4894122572179855, | |
| "learning_rate": 3.999680480392626e-05, | |
| "loss": 0.4087, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18833759427070618, | |
| "step": 330, | |
| "valid_targets_mean": 3035.5, | |
| "valid_targets_min": 732 | |
| }, | |
| { | |
| "epoch": 0.536, | |
| "grad_norm": 0.4193239467986382, | |
| "learning_rate": 3.999449588142792e-05, | |
| "loss": 0.4141, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20873858034610748, | |
| "step": 335, | |
| "valid_targets_mean": 5798.9, | |
| "valid_targets_min": 2283 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 0.4322322156645337, | |
| "learning_rate": 3.9991563053951476e-05, | |
| "loss": 0.3762, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15418782830238342, | |
| "step": 340, | |
| "valid_targets_mean": 4867.1, | |
| "valid_targets_min": 913 | |
| }, | |
| { | |
| "epoch": 0.552, | |
| "grad_norm": 0.42579176413360825, | |
| "learning_rate": 3.99880064130124e-05, | |
| "loss": 0.3795, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18619948625564575, | |
| "step": 345, | |
| "valid_targets_mean": 5330.1, | |
| "valid_targets_min": 676 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.5161509309473619, | |
| "learning_rate": 3.9983826069591535e-05, | |
| "loss": 0.4151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1781724989414215, | |
| "step": 350, | |
| "valid_targets_mean": 3472.8, | |
| "valid_targets_min": 997 | |
| }, | |
| { | |
| "epoch": 0.568, | |
| "grad_norm": 0.6041811404254598, | |
| "learning_rate": 3.997902215413163e-05, | |
| "loss": 0.3963, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30378806591033936, | |
| "step": 355, | |
| "valid_targets_mean": 3981.2, | |
| "valid_targets_min": 912 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.6498872075998322, | |
| "learning_rate": 3.997359481653327e-05, | |
| "loss": 0.407, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20173442363739014, | |
| "step": 360, | |
| "valid_targets_mean": 2788.2, | |
| "valid_targets_min": 878 | |
| }, | |
| { | |
| "epoch": 0.584, | |
| "grad_norm": 0.5061612683016131, | |
| "learning_rate": 3.996754422615023e-05, | |
| "loss": 0.3623, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22666072845458984, | |
| "step": 365, | |
| "valid_targets_mean": 4118.0, | |
| "valid_targets_min": 1373 | |
| }, | |
| { | |
| "epoch": 0.592, | |
| "grad_norm": 0.5361445261038215, | |
| "learning_rate": 3.996087057178411e-05, | |
| "loss": 0.4507, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19703161716461182, | |
| "step": 370, | |
| "valid_targets_mean": 3619.8, | |
| "valid_targets_min": 582 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.672527479458296, | |
| "learning_rate": 3.995357406167856e-05, | |
| "loss": 0.4195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2972131371498108, | |
| "step": 375, | |
| "valid_targets_mean": 3516.8, | |
| "valid_targets_min": 874 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 0.44549432799665417, | |
| "learning_rate": 3.994565492351267e-05, | |
| "loss": 0.4056, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17287719249725342, | |
| "step": 380, | |
| "valid_targets_mean": 5475.5, | |
| "valid_targets_min": 1842 | |
| }, | |
| { | |
| "epoch": 0.616, | |
| "grad_norm": 0.6778440222156983, | |
| "learning_rate": 3.993711340439394e-05, | |
| "loss": 0.4242, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21647445857524872, | |
| "step": 385, | |
| "valid_targets_mean": 3971.5, | |
| "valid_targets_min": 1585 | |
| }, | |
| { | |
| "epoch": 0.624, | |
| "grad_norm": 0.4459566145405042, | |
| "learning_rate": 3.9927949770850535e-05, | |
| "loss": 0.3925, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1151498481631279, | |
| "step": 390, | |
| "valid_targets_mean": 2951.2, | |
| "valid_targets_min": 1088 | |
| }, | |
| { | |
| "epoch": 0.632, | |
| "grad_norm": 0.48331678470692757, | |
| "learning_rate": 3.991816430882297e-05, | |
| "loss": 0.3602, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2142452448606491, | |
| "step": 395, | |
| "valid_targets_mean": 4534.9, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.5127689050837395, | |
| "learning_rate": 3.9907757323655206e-05, | |
| "loss": 0.4035, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23935633897781372, | |
| "step": 400, | |
| "valid_targets_mean": 4701.5, | |
| "valid_targets_min": 598 | |
| }, | |
| { | |
| "epoch": 0.648, | |
| "grad_norm": 0.4843857269179256, | |
| "learning_rate": 3.98967291400851e-05, | |
| "loss": 0.3784, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25847354531288147, | |
| "step": 405, | |
| "valid_targets_mean": 5699.5, | |
| "valid_targets_min": 879 | |
| }, | |
| { | |
| "epoch": 0.656, | |
| "grad_norm": 0.5005998339687705, | |
| "learning_rate": 3.98850801022343e-05, | |
| "loss": 0.3906, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1377241611480713, | |
| "step": 410, | |
| "valid_targets_mean": 2504.0, | |
| "valid_targets_min": 740 | |
| }, | |
| { | |
| "epoch": 0.664, | |
| "grad_norm": 0.4717110410055623, | |
| "learning_rate": 3.987281057359746e-05, | |
| "loss": 0.4076, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20272189378738403, | |
| "step": 415, | |
| "valid_targets_mean": 5388.0, | |
| "valid_targets_min": 1322 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 0.5915926837830854, | |
| "learning_rate": 3.985992093703096e-05, | |
| "loss": 0.4205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16994090378284454, | |
| "step": 420, | |
| "valid_targets_mean": 2121.0, | |
| "valid_targets_min": 784 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 0.43255266985931967, | |
| "learning_rate": 3.98464115947409e-05, | |
| "loss": 0.3954, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1824256181716919, | |
| "step": 425, | |
| "valid_targets_mean": 5472.2, | |
| "valid_targets_min": 1140 | |
| }, | |
| { | |
| "epoch": 0.688, | |
| "grad_norm": 0.49054534785016624, | |
| "learning_rate": 3.9832282968270595e-05, | |
| "loss": 0.4021, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1873103380203247, | |
| "step": 430, | |
| "valid_targets_mean": 3608.4, | |
| "valid_targets_min": 735 | |
| }, | |
| { | |
| "epoch": 0.696, | |
| "grad_norm": 0.5313053332029032, | |
| "learning_rate": 3.9817535498487385e-05, | |
| "loss": 0.4255, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2174983024597168, | |
| "step": 435, | |
| "valid_targets_mean": 3903.1, | |
| "valid_targets_min": 773 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.5660976015817243, | |
| "learning_rate": 3.980216964556892e-05, | |
| "loss": 0.4006, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2702789306640625, | |
| "step": 440, | |
| "valid_targets_mean": 5340.2, | |
| "valid_targets_min": 2464 | |
| }, | |
| { | |
| "epoch": 0.712, | |
| "grad_norm": 0.3583060517799168, | |
| "learning_rate": 3.978618588898873e-05, | |
| "loss": 0.3687, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17784491181373596, | |
| "step": 445, | |
| "valid_targets_mean": 5914.0, | |
| "valid_targets_min": 1108 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.5085712449981009, | |
| "learning_rate": 3.976958472750137e-05, | |
| "loss": 0.415, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17204846441745758, | |
| "step": 450, | |
| "valid_targets_mean": 2789.6, | |
| "valid_targets_min": 1260 | |
| }, | |
| { | |
| "epoch": 0.728, | |
| "grad_norm": 0.5391324690732857, | |
| "learning_rate": 3.9752366679126754e-05, | |
| "loss": 0.4117, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23437952995300293, | |
| "step": 455, | |
| "valid_targets_mean": 3562.9, | |
| "valid_targets_min": 952 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 0.5567986016900565, | |
| "learning_rate": 3.973453228113405e-05, | |
| "loss": 0.4096, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22452281415462494, | |
| "step": 460, | |
| "valid_targets_mean": 3349.6, | |
| "valid_targets_min": 623 | |
| }, | |
| { | |
| "epoch": 0.744, | |
| "grad_norm": 0.5058714202986386, | |
| "learning_rate": 3.971608209002489e-05, | |
| "loss": 0.4383, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26416462659835815, | |
| "step": 465, | |
| "valid_targets_mean": 4220.1, | |
| "valid_targets_min": 1122 | |
| }, | |
| { | |
| "epoch": 0.752, | |
| "grad_norm": 0.5026494486558096, | |
| "learning_rate": 3.969701668151603e-05, | |
| "loss": 0.3986, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16783612966537476, | |
| "step": 470, | |
| "valid_targets_mean": 3548.6, | |
| "valid_targets_min": 787 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.5620692640215499, | |
| "learning_rate": 3.9677336650521336e-05, | |
| "loss": 0.3936, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23793232440948486, | |
| "step": 475, | |
| "valid_targets_mean": 4302.1, | |
| "valid_targets_min": 857 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.5022492717127841, | |
| "learning_rate": 3.9657042611133294e-05, | |
| "loss": 0.4374, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27741798758506775, | |
| "step": 480, | |
| "valid_targets_mean": 5250.4, | |
| "valid_targets_min": 1286 | |
| }, | |
| { | |
| "epoch": 0.776, | |
| "grad_norm": 0.46269173482413795, | |
| "learning_rate": 3.963613519660379e-05, | |
| "loss": 0.4168, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2027072012424469, | |
| "step": 485, | |
| "valid_targets_mean": 5004.6, | |
| "valid_targets_min": 613 | |
| }, | |
| { | |
| "epoch": 0.784, | |
| "grad_norm": 0.5741596915736422, | |
| "learning_rate": 3.961461505932435e-05, | |
| "loss": 0.4089, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20953507721424103, | |
| "step": 490, | |
| "valid_targets_mean": 4452.9, | |
| "valid_targets_min": 872 | |
| }, | |
| { | |
| "epoch": 0.792, | |
| "grad_norm": 0.4907391959582943, | |
| "learning_rate": 3.959248287080583e-05, | |
| "loss": 0.4368, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16955000162124634, | |
| "step": 495, | |
| "valid_targets_mean": 3555.2, | |
| "valid_targets_min": 371 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.6031193676144202, | |
| "learning_rate": 3.9569739321657416e-05, | |
| "loss": 0.3877, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1926141083240509, | |
| "step": 500, | |
| "valid_targets_mean": 2296.8, | |
| "valid_targets_min": 895 | |
| }, | |
| { | |
| "epoch": 0.808, | |
| "grad_norm": 0.5937397695685496, | |
| "learning_rate": 3.9546385121565095e-05, | |
| "loss": 0.407, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2059236317873001, | |
| "step": 505, | |
| "valid_targets_mean": 2328.5, | |
| "valid_targets_min": 875 | |
| }, | |
| { | |
| "epoch": 0.816, | |
| "grad_norm": 0.44462295687513187, | |
| "learning_rate": 3.952242099926951e-05, | |
| "loss": 0.39, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15669041872024536, | |
| "step": 510, | |
| "valid_targets_mean": 4059.2, | |
| "valid_targets_min": 896 | |
| }, | |
| { | |
| "epoch": 0.824, | |
| "grad_norm": 0.4975667663636538, | |
| "learning_rate": 3.9497847702543196e-05, | |
| "loss": 0.4132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2388497292995453, | |
| "step": 515, | |
| "valid_targets_mean": 4820.4, | |
| "valid_targets_min": 797 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 0.5711830488685984, | |
| "learning_rate": 3.94726659981673e-05, | |
| "loss": 0.4272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1668613851070404, | |
| "step": 520, | |
| "valid_targets_mean": 2210.6, | |
| "valid_targets_min": 1146 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.5110974517075352, | |
| "learning_rate": 3.94468766719076e-05, | |
| "loss": 0.3883, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15008708834648132, | |
| "step": 525, | |
| "valid_targets_mean": 2796.0, | |
| "valid_targets_min": 706 | |
| }, | |
| { | |
| "epoch": 0.848, | |
| "grad_norm": 0.6082548647418597, | |
| "learning_rate": 3.942048052849001e-05, | |
| "loss": 0.4015, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20496749877929688, | |
| "step": 530, | |
| "valid_targets_mean": 3376.1, | |
| "valid_targets_min": 727 | |
| }, | |
| { | |
| "epoch": 0.856, | |
| "grad_norm": 0.5881521119749641, | |
| "learning_rate": 3.939347839157548e-05, | |
| "loss": 0.4042, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1840178519487381, | |
| "step": 535, | |
| "valid_targets_mean": 3227.4, | |
| "valid_targets_min": 523 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 0.40438864628986093, | |
| "learning_rate": 3.9365871103734264e-05, | |
| "loss": 0.3748, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14281845092773438, | |
| "step": 540, | |
| "valid_targets_mean": 4520.6, | |
| "valid_targets_min": 1131 | |
| }, | |
| { | |
| "epoch": 0.872, | |
| "grad_norm": 0.43619871599488097, | |
| "learning_rate": 3.933765952641965e-05, | |
| "loss": 0.4081, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11543935537338257, | |
| "step": 545, | |
| "valid_targets_mean": 2299.2, | |
| "valid_targets_min": 847 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.6284680735434611, | |
| "learning_rate": 3.930884453994109e-05, | |
| "loss": 0.3934, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2154695689678192, | |
| "step": 550, | |
| "valid_targets_mean": 2253.1, | |
| "valid_targets_min": 1014 | |
| }, | |
| { | |
| "epoch": 0.888, | |
| "grad_norm": 0.534153789789492, | |
| "learning_rate": 3.9279427043436706e-05, | |
| "loss": 0.4357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16836689412593842, | |
| "step": 555, | |
| "valid_targets_mean": 2522.4, | |
| "valid_targets_min": 866 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 0.42996929538088907, | |
| "learning_rate": 3.924940795484525e-05, | |
| "loss": 0.3961, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1445537656545639, | |
| "step": 560, | |
| "valid_targets_mean": 3909.5, | |
| "valid_targets_min": 779 | |
| }, | |
| { | |
| "epoch": 0.904, | |
| "grad_norm": 0.5487891975426903, | |
| "learning_rate": 3.9218788210877436e-05, | |
| "loss": 0.4047, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17581608891487122, | |
| "step": 565, | |
| "valid_targets_mean": 2792.2, | |
| "valid_targets_min": 780 | |
| }, | |
| { | |
| "epoch": 0.912, | |
| "grad_norm": 0.8309295578756681, | |
| "learning_rate": 3.918756876698676e-05, | |
| "loss": 0.4498, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24004212021827698, | |
| "step": 570, | |
| "valid_targets_mean": 2094.1, | |
| "valid_targets_min": 800 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 0.5731658303042831, | |
| "learning_rate": 3.9155750597339634e-05, | |
| "loss": 0.4248, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23413121700286865, | |
| "step": 575, | |
| "valid_targets_mean": 2952.8, | |
| "valid_targets_min": 740 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 0.6857186460842621, | |
| "learning_rate": 3.912333469478502e-05, | |
| "loss": 0.4148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24854609370231628, | |
| "step": 580, | |
| "valid_targets_mean": 3259.5, | |
| "valid_targets_min": 1202 | |
| }, | |
| { | |
| "epoch": 0.936, | |
| "grad_norm": 0.46414179194452654, | |
| "learning_rate": 3.909032207082344e-05, | |
| "loss": 0.3897, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19742190837860107, | |
| "step": 585, | |
| "valid_targets_mean": 4621.4, | |
| "valid_targets_min": 1137 | |
| }, | |
| { | |
| "epoch": 0.944, | |
| "grad_norm": 0.5432959658650993, | |
| "learning_rate": 3.90567137555754e-05, | |
| "loss": 0.3952, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18072067201137543, | |
| "step": 590, | |
| "valid_targets_mean": 2764.4, | |
| "valid_targets_min": 610 | |
| }, | |
| { | |
| "epoch": 0.952, | |
| "grad_norm": 0.513616096435003, | |
| "learning_rate": 3.9022510797749286e-05, | |
| "loss": 0.4508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28625935316085815, | |
| "step": 595, | |
| "valid_targets_mean": 4943.6, | |
| "valid_targets_min": 1006 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.4135402289886326, | |
| "learning_rate": 3.898771426460859e-05, | |
| "loss": 0.3864, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1969718635082245, | |
| "step": 600, | |
| "valid_targets_mean": 4979.9, | |
| "valid_targets_min": 971 | |
| }, | |
| { | |
| "epoch": 0.968, | |
| "grad_norm": 0.42359538658922197, | |
| "learning_rate": 3.8952325241938635e-05, | |
| "loss": 0.4183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1678941547870636, | |
| "step": 605, | |
| "valid_targets_mean": 4025.1, | |
| "valid_targets_min": 1413 | |
| }, | |
| { | |
| "epoch": 0.976, | |
| "grad_norm": 0.437069204077118, | |
| "learning_rate": 3.8916344834012695e-05, | |
| "loss": 0.3807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18256065249443054, | |
| "step": 610, | |
| "valid_targets_mean": 4092.8, | |
| "valid_targets_min": 722 | |
| }, | |
| { | |
| "epoch": 0.984, | |
| "grad_norm": 0.3981270298976202, | |
| "learning_rate": 3.887977416355754e-05, | |
| "loss": 0.3837, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.213514506816864, | |
| "step": 615, | |
| "valid_targets_mean": 5298.8, | |
| "valid_targets_min": 1094 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 0.39948471020655046, | |
| "learning_rate": 3.884261437171838e-05, | |
| "loss": 0.3919, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18268711864948273, | |
| "step": 620, | |
| "valid_targets_mean": 5262.4, | |
| "valid_targets_min": 771 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.4218730892895318, | |
| "learning_rate": 3.8804866618023284e-05, | |
| "loss": 0.3663, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2073502242565155, | |
| "step": 625, | |
| "valid_targets_mean": 6279.8, | |
| "valid_targets_min": 665 | |
| }, | |
| { | |
| "epoch": 1.008, | |
| "grad_norm": 0.4006188688474502, | |
| "learning_rate": 3.876653208034698e-05, | |
| "loss": 0.375, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15046511590480804, | |
| "step": 630, | |
| "valid_targets_mean": 4694.0, | |
| "valid_targets_min": 572 | |
| }, | |
| { | |
| "epoch": 1.016, | |
| "grad_norm": 0.5023669209376572, | |
| "learning_rate": 3.8727611954874114e-05, | |
| "loss": 0.4108, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19494599103927612, | |
| "step": 635, | |
| "valid_targets_mean": 4312.2, | |
| "valid_targets_min": 1605 | |
| }, | |
| { | |
| "epoch": 1.024, | |
| "grad_norm": 0.5415179452211821, | |
| "learning_rate": 3.8688107456061904e-05, | |
| "loss": 0.3649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16564016044139862, | |
| "step": 640, | |
| "valid_targets_mean": 5437.8, | |
| "valid_targets_min": 950 | |
| }, | |
| { | |
| "epoch": 1.032, | |
| "grad_norm": 0.5250999668731369, | |
| "learning_rate": 3.864801981660227e-05, | |
| "loss": 0.3787, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16773557662963867, | |
| "step": 645, | |
| "valid_targets_mean": 2591.0, | |
| "valid_targets_min": 896 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 0.4792499299558008, | |
| "learning_rate": 3.860735028738337e-05, | |
| "loss": 0.3879, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11868780851364136, | |
| "step": 650, | |
| "valid_targets_mean": 2843.8, | |
| "valid_targets_min": 667 | |
| }, | |
| { | |
| "epoch": 1.048, | |
| "grad_norm": 0.5931956749749214, | |
| "learning_rate": 3.856610013745051e-05, | |
| "loss": 0.3869, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17905378341674805, | |
| "step": 655, | |
| "valid_targets_mean": 2138.0, | |
| "valid_targets_min": 811 | |
| }, | |
| { | |
| "epoch": 1.056, | |
| "grad_norm": 0.5070089472539886, | |
| "learning_rate": 3.852427065396665e-05, | |
| "loss": 0.3597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21618938446044922, | |
| "step": 660, | |
| "valid_targets_mean": 3818.6, | |
| "valid_targets_min": 874 | |
| }, | |
| { | |
| "epoch": 1.064, | |
| "grad_norm": 0.5277258256022489, | |
| "learning_rate": 3.848186314217213e-05, | |
| "loss": 0.3832, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20905262231826782, | |
| "step": 665, | |
| "valid_targets_mean": 5543.8, | |
| "valid_targets_min": 1842 | |
| }, | |
| { | |
| "epoch": 1.072, | |
| "grad_norm": 0.45992756895656156, | |
| "learning_rate": 3.843887892534402e-05, | |
| "loss": 0.3628, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14996416866779327, | |
| "step": 670, | |
| "valid_targets_mean": 3032.8, | |
| "valid_targets_min": 865 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 0.43582299518949036, | |
| "learning_rate": 3.8395319344754776e-05, | |
| "loss": 0.3695, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14619383215904236, | |
| "step": 675, | |
| "valid_targets_mean": 4816.9, | |
| "valid_targets_min": 678 | |
| }, | |
| { | |
| "epoch": 1.088, | |
| "grad_norm": 0.47992188401849384, | |
| "learning_rate": 3.8351185759630435e-05, | |
| "loss": 0.3989, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17877763509750366, | |
| "step": 680, | |
| "valid_targets_mean": 4263.5, | |
| "valid_targets_min": 586 | |
| }, | |
| { | |
| "epoch": 1.096, | |
| "grad_norm": 0.5853943377609927, | |
| "learning_rate": 3.830647954710816e-05, | |
| "loss": 0.3652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09830217808485031, | |
| "step": 685, | |
| "valid_targets_mean": 1025.2, | |
| "valid_targets_min": 527 | |
| }, | |
| { | |
| "epoch": 1.104, | |
| "grad_norm": 0.5453115260739528, | |
| "learning_rate": 3.826120210219331e-05, | |
| "loss": 0.4072, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19349700212478638, | |
| "step": 690, | |
| "valid_targets_mean": 3093.0, | |
| "valid_targets_min": 754 | |
| }, | |
| { | |
| "epoch": 1.112, | |
| "grad_norm": 0.37877838490425225, | |
| "learning_rate": 3.8215354837715836e-05, | |
| "loss": 0.3834, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12431143969297409, | |
| "step": 695, | |
| "valid_targets_mean": 5127.8, | |
| "valid_targets_min": 707 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.6610820231714544, | |
| "learning_rate": 3.816893918428631e-05, | |
| "loss": 0.3786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18000784516334534, | |
| "step": 700, | |
| "valid_targets_mean": 2055.1, | |
| "valid_targets_min": 688 | |
| }, | |
| { | |
| "epoch": 1.1280000000000001, | |
| "grad_norm": 0.5300125572424695, | |
| "learning_rate": 3.8121956590251153e-05, | |
| "loss": 0.4069, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1879684329032898, | |
| "step": 705, | |
| "valid_targets_mean": 3468.1, | |
| "valid_targets_min": 786 | |
| }, | |
| { | |
| "epoch": 1.1360000000000001, | |
| "grad_norm": 0.44683055917689773, | |
| "learning_rate": 3.8074408521647576e-05, | |
| "loss": 0.3836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20903831720352173, | |
| "step": 710, | |
| "valid_targets_mean": 5525.9, | |
| "valid_targets_min": 422 | |
| }, | |
| { | |
| "epoch": 1.144, | |
| "grad_norm": 0.5102572350380875, | |
| "learning_rate": 3.802629646215771e-05, | |
| "loss": 0.3792, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17337819933891296, | |
| "step": 715, | |
| "valid_targets_mean": 4684.0, | |
| "valid_targets_min": 1435 | |
| }, | |
| { | |
| "epoch": 1.152, | |
| "grad_norm": 0.5765523983200641, | |
| "learning_rate": 3.79776219130624e-05, | |
| "loss": 0.3577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1805480420589447, | |
| "step": 720, | |
| "valid_targets_mean": 2605.1, | |
| "valid_targets_min": 1231 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 0.4897830077820699, | |
| "learning_rate": 3.792838639319431e-05, | |
| "loss": 0.3629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19262930750846863, | |
| "step": 725, | |
| "valid_targets_mean": 3851.5, | |
| "valid_targets_min": 1736 | |
| }, | |
| { | |
| "epoch": 1.168, | |
| "grad_norm": 0.4322207849895438, | |
| "learning_rate": 3.787859143889054e-05, | |
| "loss": 0.3539, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23677626252174377, | |
| "step": 730, | |
| "valid_targets_mean": 6349.4, | |
| "valid_targets_min": 1935 | |
| }, | |
| { | |
| "epoch": 1.176, | |
| "grad_norm": 0.5128262395764999, | |
| "learning_rate": 3.782823860394469e-05, | |
| "loss": 0.3568, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20344534516334534, | |
| "step": 735, | |
| "valid_targets_mean": 3306.5, | |
| "valid_targets_min": 1146 | |
| }, | |
| { | |
| "epoch": 1.184, | |
| "grad_norm": 0.574030810014404, | |
| "learning_rate": 3.777732945955841e-05, | |
| "loss": 0.39, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2069251835346222, | |
| "step": 740, | |
| "valid_targets_mean": 3683.5, | |
| "valid_targets_min": 823 | |
| }, | |
| { | |
| "epoch": 1.192, | |
| "grad_norm": 0.5501142652379903, | |
| "learning_rate": 3.772586559429229e-05, | |
| "loss": 0.359, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26781395077705383, | |
| "step": 745, | |
| "valid_targets_mean": 4816.9, | |
| "valid_targets_min": 1250 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.4653456781818237, | |
| "learning_rate": 3.767384861401636e-05, | |
| "loss": 0.3919, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1659979671239853, | |
| "step": 750, | |
| "valid_targets_mean": 4349.2, | |
| "valid_targets_min": 847 | |
| }, | |
| { | |
| "epoch": 1.208, | |
| "grad_norm": 0.38274296561169047, | |
| "learning_rate": 3.762128014185998e-05, | |
| "loss": 0.3675, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2027568817138672, | |
| "step": 755, | |
| "valid_targets_mean": 7722.0, | |
| "valid_targets_min": 1045 | |
| }, | |
| { | |
| "epoch": 1.216, | |
| "grad_norm": 0.9407563287769353, | |
| "learning_rate": 3.7568161818161135e-05, | |
| "loss": 0.3896, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1349801868200302, | |
| "step": 760, | |
| "valid_targets_mean": 2931.2, | |
| "valid_targets_min": 690 | |
| }, | |
| { | |
| "epoch": 1.224, | |
| "grad_norm": 0.5295635911819686, | |
| "learning_rate": 3.751449530041532e-05, | |
| "loss": 0.3771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17271582782268524, | |
| "step": 765, | |
| "valid_targets_mean": 3066.4, | |
| "valid_targets_min": 422 | |
| }, | |
| { | |
| "epoch": 1.232, | |
| "grad_norm": 0.543101523869541, | |
| "learning_rate": 3.7460282263223764e-05, | |
| "loss": 0.3767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17287389934062958, | |
| "step": 770, | |
| "valid_targets_mean": 2902.2, | |
| "valid_targets_min": 1035 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 0.5204988467550381, | |
| "learning_rate": 3.740552439824122e-05, | |
| "loss": 0.3991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19387352466583252, | |
| "step": 775, | |
| "valid_targets_mean": 3014.8, | |
| "valid_targets_min": 789 | |
| }, | |
| { | |
| "epoch": 1.248, | |
| "grad_norm": 0.5793118660838545, | |
| "learning_rate": 3.735022341412314e-05, | |
| "loss": 0.352, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1868707835674286, | |
| "step": 780, | |
| "valid_targets_mean": 2748.2, | |
| "valid_targets_min": 1103 | |
| }, | |
| { | |
| "epoch": 1.256, | |
| "grad_norm": 0.49312789125401785, | |
| "learning_rate": 3.7294381036472386e-05, | |
| "loss": 0.3778, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14733919501304626, | |
| "step": 785, | |
| "valid_targets_mean": 3287.6, | |
| "valid_targets_min": 984 | |
| }, | |
| { | |
| "epoch": 1.264, | |
| "grad_norm": 0.41240222215312733, | |
| "learning_rate": 3.723799900778538e-05, | |
| "loss": 0.3789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.126112163066864, | |
| "step": 790, | |
| "valid_targets_mean": 3915.2, | |
| "valid_targets_min": 767 | |
| }, | |
| { | |
| "epoch": 1.272, | |
| "grad_norm": 0.4390287545765749, | |
| "learning_rate": 3.7181079087397705e-05, | |
| "loss": 0.3514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15731275081634521, | |
| "step": 795, | |
| "valid_targets_mean": 4579.5, | |
| "valid_targets_min": 1110 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 0.5857426000892533, | |
| "learning_rate": 3.712362305142926e-05, | |
| "loss": 0.382, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2865251302719116, | |
| "step": 800, | |
| "valid_targets_mean": 4268.1, | |
| "valid_targets_min": 903 | |
| }, | |
| { | |
| "epoch": 1.288, | |
| "grad_norm": 0.5522244993785961, | |
| "learning_rate": 3.706563269272878e-05, | |
| "loss": 0.4019, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1548284888267517, | |
| "step": 805, | |
| "valid_targets_mean": 5253.2, | |
| "valid_targets_min": 958 | |
| }, | |
| { | |
| "epoch": 1.296, | |
| "grad_norm": 0.49707542029017, | |
| "learning_rate": 3.700710982081794e-05, | |
| "loss": 0.3604, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1068187803030014, | |
| "step": 810, | |
| "valid_targets_mean": 2315.8, | |
| "valid_targets_min": 1005 | |
| }, | |
| { | |
| "epoch": 1.304, | |
| "grad_norm": 0.4597650989348005, | |
| "learning_rate": 3.694805626183486e-05, | |
| "loss": 0.3419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1940535604953766, | |
| "step": 815, | |
| "valid_targets_mean": 6227.8, | |
| "valid_targets_min": 1092 | |
| }, | |
| { | |
| "epoch": 1.312, | |
| "grad_norm": 0.4807446238755427, | |
| "learning_rate": 3.688847385847711e-05, | |
| "loss": 0.3648, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2306816130876541, | |
| "step": 820, | |
| "valid_targets_mean": 4811.5, | |
| "valid_targets_min": 888 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.37954247976442135, | |
| "learning_rate": 3.682836446994428e-05, | |
| "loss": 0.355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11209504306316376, | |
| "step": 825, | |
| "valid_targets_mean": 4343.1, | |
| "valid_targets_min": 554 | |
| }, | |
| { | |
| "epoch": 1.328, | |
| "grad_norm": 0.6968928370006656, | |
| "learning_rate": 3.676772997187989e-05, | |
| "loss": 0.4238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1730729341506958, | |
| "step": 830, | |
| "valid_targets_mean": 2439.0, | |
| "valid_targets_min": 598 | |
| }, | |
| { | |
| "epoch": 1.336, | |
| "grad_norm": 0.5644540602579898, | |
| "learning_rate": 3.670657225631289e-05, | |
| "loss": 0.3816, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2261945605278015, | |
| "step": 835, | |
| "valid_targets_mean": 3144.8, | |
| "valid_targets_min": 682 | |
| }, | |
| { | |
| "epoch": 1.3439999999999999, | |
| "grad_norm": 0.4518440551193136, | |
| "learning_rate": 3.6644893231598635e-05, | |
| "loss": 0.3949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13814306259155273, | |
| "step": 840, | |
| "valid_targets_mean": 4653.5, | |
| "valid_targets_min": 595 | |
| }, | |
| { | |
| "epoch": 1.3519999999999999, | |
| "grad_norm": 0.5278282431174571, | |
| "learning_rate": 3.658269482235932e-05, | |
| "loss": 0.3715, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25394973158836365, | |
| "step": 845, | |
| "valid_targets_mean": 3977.8, | |
| "valid_targets_min": 1029 | |
| }, | |
| { | |
| "epoch": 1.3599999999999999, | |
| "grad_norm": 0.4752219437628507, | |
| "learning_rate": 3.651997896942394e-05, | |
| "loss": 0.3455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12458296865224838, | |
| "step": 850, | |
| "valid_targets_mean": 3616.1, | |
| "valid_targets_min": 596 | |
| }, | |
| { | |
| "epoch": 1.3679999999999999, | |
| "grad_norm": 0.5099738101411846, | |
| "learning_rate": 3.645674762976769e-05, | |
| "loss": 0.3951, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22577358782291412, | |
| "step": 855, | |
| "valid_targets_mean": 4207.6, | |
| "valid_targets_min": 1149 | |
| }, | |
| { | |
| "epoch": 1.376, | |
| "grad_norm": 0.38904671121083256, | |
| "learning_rate": 3.639300277645096e-05, | |
| "loss": 0.358, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13607583940029144, | |
| "step": 860, | |
| "valid_targets_mean": 4714.9, | |
| "valid_targets_min": 1457 | |
| }, | |
| { | |
| "epoch": 1.384, | |
| "grad_norm": 0.3729523812901352, | |
| "learning_rate": 3.6328746398557715e-05, | |
| "loss": 0.3478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1809740662574768, | |
| "step": 865, | |
| "valid_targets_mean": 5782.2, | |
| "valid_targets_min": 965 | |
| }, | |
| { | |
| "epoch": 1.392, | |
| "grad_norm": 1.3346441971185985, | |
| "learning_rate": 3.6263980501133466e-05, | |
| "loss": 0.3478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11153702437877655, | |
| "step": 870, | |
| "valid_targets_mean": 2711.6, | |
| "valid_targets_min": 656 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.4932114438780455, | |
| "learning_rate": 3.619870710512268e-05, | |
| "loss": 0.3879, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16475415229797363, | |
| "step": 875, | |
| "valid_targets_mean": 3808.2, | |
| "valid_targets_min": 573 | |
| }, | |
| { | |
| "epoch": 1.408, | |
| "grad_norm": 0.42202752832274765, | |
| "learning_rate": 3.6132928247305713e-05, | |
| "loss": 0.3583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1160656213760376, | |
| "step": 880, | |
| "valid_targets_mean": 2949.6, | |
| "valid_targets_min": 959 | |
| }, | |
| { | |
| "epoch": 1.416, | |
| "grad_norm": 0.45638707283657404, | |
| "learning_rate": 3.60666459802353e-05, | |
| "loss": 0.4137, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1908377707004547, | |
| "step": 885, | |
| "valid_targets_mean": 4822.8, | |
| "valid_targets_min": 1414 | |
| }, | |
| { | |
| "epoch": 1.424, | |
| "grad_norm": 0.49655080228821374, | |
| "learning_rate": 3.599986237217245e-05, | |
| "loss": 0.3884, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14393456280231476, | |
| "step": 890, | |
| "valid_targets_mean": 3312.5, | |
| "valid_targets_min": 1288 | |
| }, | |
| { | |
| "epoch": 1.432, | |
| "grad_norm": 0.40994947703418805, | |
| "learning_rate": 3.593257950702194e-05, | |
| "loss": 0.3821, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19887655973434448, | |
| "step": 895, | |
| "valid_targets_mean": 5122.5, | |
| "valid_targets_min": 1689 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 0.6100341519792659, | |
| "learning_rate": 3.586479948426728e-05, | |
| "loss": 0.4045, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29838448762893677, | |
| "step": 900, | |
| "valid_targets_mean": 3684.9, | |
| "valid_targets_min": 781 | |
| }, | |
| { | |
| "epoch": 1.448, | |
| "grad_norm": 0.40258620360925185, | |
| "learning_rate": 3.579652441890523e-05, | |
| "loss": 0.357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15237580239772797, | |
| "step": 905, | |
| "valid_targets_mean": 4839.9, | |
| "valid_targets_min": 1123 | |
| }, | |
| { | |
| "epoch": 1.456, | |
| "grad_norm": 0.6002865400954618, | |
| "learning_rate": 3.572775644137974e-05, | |
| "loss": 0.3534, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14805619418621063, | |
| "step": 910, | |
| "valid_targets_mean": 1668.4, | |
| "valid_targets_min": 697 | |
| }, | |
| { | |
| "epoch": 1.464, | |
| "grad_norm": 0.34946528869800725, | |
| "learning_rate": 3.5658497697515534e-05, | |
| "loss": 0.3369, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1722603142261505, | |
| "step": 915, | |
| "valid_targets_mean": 6944.6, | |
| "valid_targets_min": 1284 | |
| }, | |
| { | |
| "epoch": 1.472, | |
| "grad_norm": 0.5550974904476521, | |
| "learning_rate": 3.558875034845113e-05, | |
| "loss": 0.3414, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1764376014471054, | |
| "step": 920, | |
| "valid_targets_mean": 5085.6, | |
| "valid_targets_min": 1302 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 0.40211624001194296, | |
| "learning_rate": 3.551851657057139e-05, | |
| "loss": 0.3507, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18077786266803741, | |
| "step": 925, | |
| "valid_targets_mean": 6009.4, | |
| "valid_targets_min": 853 | |
| }, | |
| { | |
| "epoch": 1.488, | |
| "grad_norm": 0.42842599212123683, | |
| "learning_rate": 3.544779855543963e-05, | |
| "loss": 0.3435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21971535682678223, | |
| "step": 930, | |
| "valid_targets_mean": 5389.8, | |
| "valid_targets_min": 1860 | |
| }, | |
| { | |
| "epoch": 1.496, | |
| "grad_norm": 0.5412809944985588, | |
| "learning_rate": 3.5376598509729226e-05, | |
| "loss": 0.3777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20583224296569824, | |
| "step": 935, | |
| "valid_targets_mean": 3356.8, | |
| "valid_targets_min": 1087 | |
| }, | |
| { | |
| "epoch": 1.504, | |
| "grad_norm": 0.4090830832550061, | |
| "learning_rate": 3.5304918655154754e-05, | |
| "loss": 0.3964, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21631677448749542, | |
| "step": 940, | |
| "valid_targets_mean": 5357.0, | |
| "valid_targets_min": 1077 | |
| }, | |
| { | |
| "epoch": 1.512, | |
| "grad_norm": 0.6620976500410706, | |
| "learning_rate": 3.523276122840266e-05, | |
| "loss": 0.3548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2258632481098175, | |
| "step": 945, | |
| "valid_targets_mean": 2185.2, | |
| "valid_targets_min": 671 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 0.4976296422194412, | |
| "learning_rate": 3.516012848106149e-05, | |
| "loss": 0.3499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18777544796466827, | |
| "step": 950, | |
| "valid_targets_mean": 3989.5, | |
| "valid_targets_min": 1180 | |
| }, | |
| { | |
| "epoch": 1.528, | |
| "grad_norm": 0.4230732174600183, | |
| "learning_rate": 3.5087022679551614e-05, | |
| "loss": 0.3575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19989222288131714, | |
| "step": 955, | |
| "valid_targets_mean": 4162.1, | |
| "valid_targets_min": 1657 | |
| }, | |
| { | |
| "epoch": 1.536, | |
| "grad_norm": 0.5275885905547862, | |
| "learning_rate": 3.5013446105054486e-05, | |
| "loss": 0.356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17071036994457245, | |
| "step": 960, | |
| "valid_targets_mean": 2735.5, | |
| "valid_targets_min": 907 | |
| }, | |
| { | |
| "epoch": 1.544, | |
| "grad_norm": 0.46078220045065427, | |
| "learning_rate": 3.493940105344152e-05, | |
| "loss": 0.3706, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1477717161178589, | |
| "step": 965, | |
| "valid_targets_mean": 2956.9, | |
| "valid_targets_min": 947 | |
| }, | |
| { | |
| "epoch": 1.552, | |
| "grad_norm": 0.3995299747785965, | |
| "learning_rate": 3.4864889835202366e-05, | |
| "loss": 0.3534, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18769648671150208, | |
| "step": 970, | |
| "valid_targets_mean": 5725.6, | |
| "valid_targets_min": 1999 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 0.64944906147947, | |
| "learning_rate": 3.4789914775372905e-05, | |
| "loss": 0.3862, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24089542031288147, | |
| "step": 975, | |
| "valid_targets_mean": 2843.6, | |
| "valid_targets_min": 869 | |
| }, | |
| { | |
| "epoch": 1.568, | |
| "grad_norm": 0.5425019910583312, | |
| "learning_rate": 3.471447821346264e-05, | |
| "loss": 0.3922, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15392085909843445, | |
| "step": 980, | |
| "valid_targets_mean": 2680.6, | |
| "valid_targets_min": 908 | |
| }, | |
| { | |
| "epoch": 1.576, | |
| "grad_norm": 0.5368209449228799, | |
| "learning_rate": 3.463858250338168e-05, | |
| "loss": 0.396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1622292697429657, | |
| "step": 985, | |
| "valid_targets_mean": 2758.0, | |
| "valid_targets_min": 869 | |
| }, | |
| { | |
| "epoch": 1.584, | |
| "grad_norm": 0.43598303845384095, | |
| "learning_rate": 3.4562230013367374e-05, | |
| "loss": 0.4045, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21002206206321716, | |
| "step": 990, | |
| "valid_targets_mean": 4658.1, | |
| "valid_targets_min": 1457 | |
| }, | |
| { | |
| "epoch": 1.592, | |
| "grad_norm": 0.3904901071899063, | |
| "learning_rate": 3.448542312591032e-05, | |
| "loss": 0.37, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20996427536010742, | |
| "step": 995, | |
| "valid_targets_mean": 6088.1, | |
| "valid_targets_min": 688 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.41487996792002385, | |
| "learning_rate": 3.440816423768007e-05, | |
| "loss": 0.3465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2232140749692917, | |
| "step": 1000, | |
| "valid_targets_mean": 6249.6, | |
| "valid_targets_min": 790 | |
| }, | |
| { | |
| "epoch": 1.608, | |
| "grad_norm": 0.41215706864500073, | |
| "learning_rate": 3.433045575945031e-05, | |
| "loss": 0.3747, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1780746430158615, | |
| "step": 1005, | |
| "valid_targets_mean": 6710.1, | |
| "valid_targets_min": 720 | |
| }, | |
| { | |
| "epoch": 1.616, | |
| "grad_norm": 0.5427836394723989, | |
| "learning_rate": 3.42523001160237e-05, | |
| "loss": 0.4069, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22656163573265076, | |
| "step": 1010, | |
| "valid_targets_mean": 3230.6, | |
| "valid_targets_min": 1009 | |
| }, | |
| { | |
| "epoch": 1.624, | |
| "grad_norm": 0.4790601799537503, | |
| "learning_rate": 3.417369974615615e-05, | |
| "loss": 0.3731, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1649462878704071, | |
| "step": 1015, | |
| "valid_targets_mean": 4221.9, | |
| "valid_targets_min": 1007 | |
| }, | |
| { | |
| "epoch": 1.6320000000000001, | |
| "grad_norm": 0.4765491643369093, | |
| "learning_rate": 3.409465710248074e-05, | |
| "loss": 0.3515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14106625318527222, | |
| "step": 1020, | |
| "valid_targets_mean": 2703.8, | |
| "valid_targets_min": 1020 | |
| }, | |
| { | |
| "epoch": 1.6400000000000001, | |
| "grad_norm": 0.49781135493555667, | |
| "learning_rate": 3.401517465143119e-05, | |
| "loss": 0.3895, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21954402327537537, | |
| "step": 1025, | |
| "valid_targets_mean": 4009.2, | |
| "valid_targets_min": 1460 | |
| }, | |
| { | |
| "epoch": 1.6480000000000001, | |
| "grad_norm": 0.45112165406620686, | |
| "learning_rate": 3.393525487316489e-05, | |
| "loss": 0.3614, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18274998664855957, | |
| "step": 1030, | |
| "valid_targets_mean": 4840.9, | |
| "valid_targets_min": 1708 | |
| }, | |
| { | |
| "epoch": 1.6560000000000001, | |
| "grad_norm": 0.4693212042872953, | |
| "learning_rate": 3.385490026148554e-05, | |
| "loss": 0.4153, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1388929784297943, | |
| "step": 1035, | |
| "valid_targets_mean": 3562.4, | |
| "valid_targets_min": 625 | |
| }, | |
| { | |
| "epoch": 1.6640000000000001, | |
| "grad_norm": 0.37397480223788876, | |
| "learning_rate": 3.377411332376529e-05, | |
| "loss": 0.3642, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2300184965133667, | |
| "step": 1040, | |
| "valid_targets_mean": 7929.9, | |
| "valid_targets_min": 994 | |
| }, | |
| { | |
| "epoch": 1.6720000000000002, | |
| "grad_norm": 0.3841145063100303, | |
| "learning_rate": 3.369289658086651e-05, | |
| "loss": 0.3544, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13292424380779266, | |
| "step": 1045, | |
| "valid_targets_mean": 3923.2, | |
| "valid_targets_min": 659 | |
| }, | |
| { | |
| "epoch": 1.6800000000000002, | |
| "grad_norm": 0.37819794098936316, | |
| "learning_rate": 3.3611252567063184e-05, | |
| "loss": 0.36, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19257867336273193, | |
| "step": 1050, | |
| "valid_targets_mean": 7234.4, | |
| "valid_targets_min": 1669 | |
| }, | |
| { | |
| "epoch": 1.688, | |
| "grad_norm": 0.4307477074202791, | |
| "learning_rate": 3.352918382996174e-05, | |
| "loss": 0.354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19784092903137207, | |
| "step": 1055, | |
| "valid_targets_mean": 4706.8, | |
| "valid_targets_min": 1256 | |
| }, | |
| { | |
| "epoch": 1.696, | |
| "grad_norm": 0.47999337411149967, | |
| "learning_rate": 3.344669293042163e-05, | |
| "loss": 0.3863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19967739284038544, | |
| "step": 1060, | |
| "valid_targets_mean": 3586.5, | |
| "valid_targets_min": 1024 | |
| }, | |
| { | |
| "epoch": 1.704, | |
| "grad_norm": 0.4958715076592039, | |
| "learning_rate": 3.336378244247539e-05, | |
| "loss": 0.3851, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25138044357299805, | |
| "step": 1065, | |
| "valid_targets_mean": 4725.0, | |
| "valid_targets_min": 877 | |
| }, | |
| { | |
| "epoch": 1.712, | |
| "grad_norm": 0.44982531036880435, | |
| "learning_rate": 3.3280454953248326e-05, | |
| "loss": 0.3318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18653461337089539, | |
| "step": 1070, | |
| "valid_targets_mean": 4260.8, | |
| "valid_targets_min": 1106 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 0.4523598014038419, | |
| "learning_rate": 3.3196713062877765e-05, | |
| "loss": 0.3524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14612746238708496, | |
| "step": 1075, | |
| "valid_targets_mean": 4109.9, | |
| "valid_targets_min": 896 | |
| }, | |
| { | |
| "epoch": 1.728, | |
| "grad_norm": 0.40758165227767357, | |
| "learning_rate": 3.311255938443196e-05, | |
| "loss": 0.3723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13389909267425537, | |
| "step": 1080, | |
| "valid_targets_mean": 4376.0, | |
| "valid_targets_min": 950 | |
| }, | |
| { | |
| "epoch": 1.736, | |
| "grad_norm": 0.5491124893709626, | |
| "learning_rate": 3.3027996543828524e-05, | |
| "loss": 0.3695, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18468721210956573, | |
| "step": 1085, | |
| "valid_targets_mean": 2868.6, | |
| "valid_targets_min": 849 | |
| }, | |
| { | |
| "epoch": 1.744, | |
| "grad_norm": 0.3993174672092302, | |
| "learning_rate": 3.2943027179752494e-05, | |
| "loss": 0.3416, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20559825003147125, | |
| "step": 1090, | |
| "valid_targets_mean": 6660.8, | |
| "valid_targets_min": 760 | |
| }, | |
| { | |
| "epoch": 1.752, | |
| "grad_norm": 0.40380661411621366, | |
| "learning_rate": 3.285765394357401e-05, | |
| "loss": 0.3312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15859174728393555, | |
| "step": 1095, | |
| "valid_targets_mean": 4548.8, | |
| "valid_targets_min": 829 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 0.5382918290555533, | |
| "learning_rate": 3.277187949926556e-05, | |
| "loss": 0.3523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21806976199150085, | |
| "step": 1100, | |
| "valid_targets_mean": 3312.4, | |
| "valid_targets_min": 1025 | |
| }, | |
| { | |
| "epoch": 1.768, | |
| "grad_norm": 0.4145669760422594, | |
| "learning_rate": 3.268570652331888e-05, | |
| "loss": 0.3984, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1447543501853943, | |
| "step": 1105, | |
| "valid_targets_mean": 3821.0, | |
| "valid_targets_min": 722 | |
| }, | |
| { | |
| "epoch": 1.776, | |
| "grad_norm": 0.6163574474026122, | |
| "learning_rate": 3.2599137704661405e-05, | |
| "loss": 0.3596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21196293830871582, | |
| "step": 1110, | |
| "valid_targets_mean": 2316.6, | |
| "valid_targets_min": 586 | |
| }, | |
| { | |
| "epoch": 1.784, | |
| "grad_norm": 0.47460067439094217, | |
| "learning_rate": 3.251217574457239e-05, | |
| "loss": 0.3742, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2540559768676758, | |
| "step": 1115, | |
| "valid_targets_mean": 4955.0, | |
| "valid_targets_min": 1701 | |
| }, | |
| { | |
| "epoch": 1.792, | |
| "grad_norm": 0.4624184685618981, | |
| "learning_rate": 3.242482335659861e-05, | |
| "loss": 0.3834, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18782807886600494, | |
| "step": 1120, | |
| "valid_targets_mean": 4641.6, | |
| "valid_targets_min": 785 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.49672506088763413, | |
| "learning_rate": 3.2337083266469687e-05, | |
| "loss": 0.3983, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24676811695098877, | |
| "step": 1125, | |
| "valid_targets_mean": 4641.5, | |
| "valid_targets_min": 803 | |
| }, | |
| { | |
| "epoch": 1.808, | |
| "grad_norm": 0.4252619396562025, | |
| "learning_rate": 3.224895821201304e-05, | |
| "loss": 0.3789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2329331487417221, | |
| "step": 1130, | |
| "valid_targets_mean": 5197.2, | |
| "valid_targets_min": 760 | |
| }, | |
| { | |
| "epoch": 1.8159999999999998, | |
| "grad_norm": 0.4185531683126807, | |
| "learning_rate": 3.2160450943068446e-05, | |
| "loss": 0.3662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16280978918075562, | |
| "step": 1135, | |
| "valid_targets_mean": 5254.2, | |
| "valid_targets_min": 2000 | |
| }, | |
| { | |
| "epoch": 1.8239999999999998, | |
| "grad_norm": 0.696452870548893, | |
| "learning_rate": 3.207156422140225e-05, | |
| "loss": 0.4045, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19130727648735046, | |
| "step": 1140, | |
| "valid_targets_mean": 1899.5, | |
| "valid_targets_min": 771 | |
| }, | |
| { | |
| "epoch": 1.8319999999999999, | |
| "grad_norm": 0.48431295913248046, | |
| "learning_rate": 3.198230082062115e-05, | |
| "loss": 0.3836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2074839472770691, | |
| "step": 1145, | |
| "valid_targets_mean": 5449.5, | |
| "valid_targets_min": 852 | |
| }, | |
| { | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 0.4133281290019396, | |
| "learning_rate": 3.189266352608574e-05, | |
| "loss": 0.366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1883474886417389, | |
| "step": 1150, | |
| "valid_targets_mean": 4908.2, | |
| "valid_targets_min": 1127 | |
| }, | |
| { | |
| "epoch": 1.8479999999999999, | |
| "grad_norm": 0.40434980197549936, | |
| "learning_rate": 3.180265513482345e-05, | |
| "loss": 0.3366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15408740937709808, | |
| "step": 1155, | |
| "valid_targets_mean": 4307.4, | |
| "valid_targets_min": 892 | |
| }, | |
| { | |
| "epoch": 1.8559999999999999, | |
| "grad_norm": 0.44482757502979986, | |
| "learning_rate": 3.171227845544143e-05, | |
| "loss": 0.3676, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2276269793510437, | |
| "step": 1160, | |
| "valid_targets_mean": 4911.0, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 1.8639999999999999, | |
| "grad_norm": 0.43600977689095316, | |
| "learning_rate": 3.162153630803877e-05, | |
| "loss": 0.3542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15386472642421722, | |
| "step": 1165, | |
| "valid_targets_mean": 4215.6, | |
| "valid_targets_min": 1328 | |
| }, | |
| { | |
| "epoch": 1.8719999999999999, | |
| "grad_norm": 0.37429615582479875, | |
| "learning_rate": 3.153043152411861e-05, | |
| "loss": 0.3945, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1603434681892395, | |
| "step": 1170, | |
| "valid_targets_mean": 4956.5, | |
| "valid_targets_min": 1233 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 0.42018805399081294, | |
| "learning_rate": 3.14389669464997e-05, | |
| "loss": 0.367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13551610708236694, | |
| "step": 1175, | |
| "valid_targets_mean": 3497.2, | |
| "valid_targets_min": 699 | |
| }, | |
| { | |
| "epoch": 1.888, | |
| "grad_norm": 0.4485191669176861, | |
| "learning_rate": 3.134714542922777e-05, | |
| "loss": 0.3694, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1404634714126587, | |
| "step": 1180, | |
| "valid_targets_mean": 3324.0, | |
| "valid_targets_min": 857 | |
| }, | |
| { | |
| "epoch": 1.896, | |
| "grad_norm": 0.5320471292171671, | |
| "learning_rate": 3.1254969837486425e-05, | |
| "loss": 0.3528, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22733406722545624, | |
| "step": 1185, | |
| "valid_targets_mean": 3494.6, | |
| "valid_targets_min": 1049 | |
| }, | |
| { | |
| "epoch": 1.904, | |
| "grad_norm": 0.4429069138743051, | |
| "learning_rate": 3.116244304750774e-05, | |
| "loss": 0.3441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21461255848407745, | |
| "step": 1190, | |
| "valid_targets_mean": 5988.0, | |
| "valid_targets_min": 1106 | |
| }, | |
| { | |
| "epoch": 1.912, | |
| "grad_norm": 0.4909918363267137, | |
| "learning_rate": 3.106956794648254e-05, | |
| "loss": 0.3888, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3207101821899414, | |
| "step": 1195, | |
| "valid_targets_mean": 5188.9, | |
| "valid_targets_min": 1740 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 0.4380391893049689, | |
| "learning_rate": 3.097634743247026e-05, | |
| "loss": 0.3777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20924848318099976, | |
| "step": 1200, | |
| "valid_targets_mean": 5444.6, | |
| "valid_targets_min": 859 | |
| }, | |
| { | |
| "epoch": 1.928, | |
| "grad_norm": 0.39395020883135107, | |
| "learning_rate": 3.08827844143086e-05, | |
| "loss": 0.3606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1892511397600174, | |
| "step": 1205, | |
| "valid_targets_mean": 4884.9, | |
| "valid_targets_min": 1116 | |
| }, | |
| { | |
| "epoch": 1.936, | |
| "grad_norm": 0.4431767573115503, | |
| "learning_rate": 3.078888181152264e-05, | |
| "loss": 0.3794, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12274126708507538, | |
| "step": 1210, | |
| "valid_targets_mean": 2207.4, | |
| "valid_targets_min": 565 | |
| }, | |
| { | |
| "epoch": 1.944, | |
| "grad_norm": 0.4393189555489097, | |
| "learning_rate": 3.0694642554233855e-05, | |
| "loss": 0.3684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1852644830942154, | |
| "step": 1215, | |
| "valid_targets_mean": 3580.2, | |
| "valid_targets_min": 847 | |
| }, | |
| { | |
| "epoch": 1.952, | |
| "grad_norm": 0.5175461857648035, | |
| "learning_rate": 3.0600069583068594e-05, | |
| "loss": 0.3874, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13556857407093048, | |
| "step": 1220, | |
| "valid_targets_mean": 2068.8, | |
| "valid_targets_min": 911 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 0.4344897474377549, | |
| "learning_rate": 3.0505165849066394e-05, | |
| "loss": 0.3416, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16121706366539001, | |
| "step": 1225, | |
| "valid_targets_mean": 4083.2, | |
| "valid_targets_min": 1160 | |
| }, | |
| { | |
| "epoch": 1.968, | |
| "grad_norm": 0.3998726023920137, | |
| "learning_rate": 3.040993431358782e-05, | |
| "loss": 0.3748, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20173384249210358, | |
| "step": 1230, | |
| "valid_targets_mean": 5485.5, | |
| "valid_targets_min": 795 | |
| }, | |
| { | |
| "epoch": 1.976, | |
| "grad_norm": 0.43530746416898686, | |
| "learning_rate": 3.031437794822215e-05, | |
| "loss": 0.3352, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1987071931362152, | |
| "step": 1235, | |
| "valid_targets_mean": 4561.8, | |
| "valid_targets_min": 737 | |
| }, | |
| { | |
| "epoch": 1.984, | |
| "grad_norm": 0.45516788943319536, | |
| "learning_rate": 3.021849973469455e-05, | |
| "loss": 0.3869, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18358221650123596, | |
| "step": 1240, | |
| "valid_targets_mean": 3207.0, | |
| "valid_targets_min": 998 | |
| }, | |
| { | |
| "epoch": 1.992, | |
| "grad_norm": 0.4576579120788942, | |
| "learning_rate": 3.012230266477313e-05, | |
| "loss": 0.3758, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11642518639564514, | |
| "step": 1245, | |
| "valid_targets_mean": 2846.4, | |
| "valid_targets_min": 645 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.41687844040354605, | |
| "learning_rate": 3.0025789740175502e-05, | |
| "loss": 0.3621, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22766928374767303, | |
| "step": 1250, | |
| "valid_targets_mean": 5502.0, | |
| "valid_targets_min": 804 | |
| }, | |
| { | |
| "epoch": 2.008, | |
| "grad_norm": 0.39445917384438633, | |
| "learning_rate": 2.9928963972475186e-05, | |
| "loss": 0.3294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18963614106178284, | |
| "step": 1255, | |
| "valid_targets_mean": 6827.1, | |
| "valid_targets_min": 716 | |
| }, | |
| { | |
| "epoch": 2.016, | |
| "grad_norm": 0.47795453215109707, | |
| "learning_rate": 2.9831828383007585e-05, | |
| "loss": 0.3477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16897451877593994, | |
| "step": 1260, | |
| "valid_targets_mean": 3771.0, | |
| "valid_targets_min": 1365 | |
| }, | |
| { | |
| "epoch": 2.024, | |
| "grad_norm": 0.40551985391746687, | |
| "learning_rate": 2.9734386002775754e-05, | |
| "loss": 0.3464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11677554249763489, | |
| "step": 1265, | |
| "valid_targets_mean": 4102.6, | |
| "valid_targets_min": 621 | |
| }, | |
| { | |
| "epoch": 2.032, | |
| "grad_norm": 0.42377235398466956, | |
| "learning_rate": 2.963663987235577e-05, | |
| "loss": 0.3505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1520041525363922, | |
| "step": 1270, | |
| "valid_targets_mean": 5001.9, | |
| "valid_targets_min": 721 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 0.4806596558991801, | |
| "learning_rate": 2.95385930418019e-05, | |
| "loss": 0.3668, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18150800466537476, | |
| "step": 1275, | |
| "valid_targets_mean": 3979.8, | |
| "valid_targets_min": 734 | |
| }, | |
| { | |
| "epoch": 2.048, | |
| "grad_norm": 0.45923619311529357, | |
| "learning_rate": 2.9440248570551406e-05, | |
| "loss": 0.3577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17516975104808807, | |
| "step": 1280, | |
| "valid_targets_mean": 4615.6, | |
| "valid_targets_min": 732 | |
| }, | |
| { | |
| "epoch": 2.056, | |
| "grad_norm": 0.43037695871723236, | |
| "learning_rate": 2.934160952732907e-05, | |
| "loss": 0.3065, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1621362715959549, | |
| "step": 1285, | |
| "valid_targets_mean": 4676.5, | |
| "valid_targets_min": 1619 | |
| }, | |
| { | |
| "epoch": 2.064, | |
| "grad_norm": 0.5278235222224164, | |
| "learning_rate": 2.9242678990051462e-05, | |
| "loss": 0.3165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12950673699378967, | |
| "step": 1290, | |
| "valid_targets_mean": 3176.2, | |
| "valid_targets_min": 820 | |
| }, | |
| { | |
| "epoch": 2.072, | |
| "grad_norm": 0.591249312950412, | |
| "learning_rate": 2.9143460045730886e-05, | |
| "loss": 0.3165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25125831365585327, | |
| "step": 1295, | |
| "valid_targets_mean": 4605.6, | |
| "valid_targets_min": 1051 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 0.4633677094496606, | |
| "learning_rate": 2.9043955790379035e-05, | |
| "loss": 0.3412, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15374936163425446, | |
| "step": 1300, | |
| "valid_targets_mean": 3456.1, | |
| "valid_targets_min": 596 | |
| }, | |
| { | |
| "epoch": 2.088, | |
| "grad_norm": 0.6839077748868694, | |
| "learning_rate": 2.8944169328910427e-05, | |
| "loss": 0.3314, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21061047911643982, | |
| "step": 1305, | |
| "valid_targets_mean": 2743.4, | |
| "valid_targets_min": 736 | |
| }, | |
| { | |
| "epoch": 2.096, | |
| "grad_norm": 0.40010179262506984, | |
| "learning_rate": 2.884410377504547e-05, | |
| "loss": 0.3489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20529218018054962, | |
| "step": 1310, | |
| "valid_targets_mean": 5945.4, | |
| "valid_targets_min": 1603 | |
| }, | |
| { | |
| "epoch": 2.104, | |
| "grad_norm": 0.46363378675791567, | |
| "learning_rate": 2.8743762251213333e-05, | |
| "loss": 0.3516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13235880434513092, | |
| "step": 1315, | |
| "valid_targets_mean": 3557.6, | |
| "valid_targets_min": 909 | |
| }, | |
| { | |
| "epoch": 2.112, | |
| "grad_norm": 0.5226657341187503, | |
| "learning_rate": 2.8643147888454507e-05, | |
| "loss": 0.321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17466039955615997, | |
| "step": 1320, | |
| "valid_targets_mean": 5333.5, | |
| "valid_targets_min": 948 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 0.5688435709083185, | |
| "learning_rate": 2.854226382632312e-05, | |
| "loss": 0.3687, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19334635138511658, | |
| "step": 1325, | |
| "valid_targets_mean": 3006.2, | |
| "valid_targets_min": 607 | |
| }, | |
| { | |
| "epoch": 2.128, | |
| "grad_norm": 0.3938353360765779, | |
| "learning_rate": 2.844111321278893e-05, | |
| "loss": 0.3356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18020674586296082, | |
| "step": 1330, | |
| "valid_targets_mean": 7703.2, | |
| "valid_targets_min": 2069 | |
| }, | |
| { | |
| "epoch": 2.136, | |
| "grad_norm": 0.6166572236379344, | |
| "learning_rate": 2.833969920413913e-05, | |
| "loss": 0.3654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15063633024692535, | |
| "step": 1335, | |
| "valid_targets_mean": 2484.9, | |
| "valid_targets_min": 588 | |
| }, | |
| { | |
| "epoch": 2.144, | |
| "grad_norm": 0.4594292950880163, | |
| "learning_rate": 2.8238024964879857e-05, | |
| "loss": 0.3588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1551552265882492, | |
| "step": 1340, | |
| "valid_targets_mean": 4537.5, | |
| "valid_targets_min": 1203 | |
| }, | |
| { | |
| "epoch": 2.152, | |
| "grad_norm": 0.4759453483875312, | |
| "learning_rate": 2.8136093667637438e-05, | |
| "loss": 0.3608, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19271422922611237, | |
| "step": 1345, | |
| "valid_targets_mean": 5253.5, | |
| "valid_targets_min": 634 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.4274589956773415, | |
| "learning_rate": 2.8033908493059394e-05, | |
| "loss": 0.3447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26053884625434875, | |
| "step": 1350, | |
| "valid_targets_mean": 7490.9, | |
| "valid_targets_min": 696 | |
| }, | |
| { | |
| "epoch": 2.168, | |
| "grad_norm": 0.48920586887692913, | |
| "learning_rate": 2.793147262971519e-05, | |
| "loss": 0.3384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20303943753242493, | |
| "step": 1355, | |
| "valid_targets_mean": 3912.5, | |
| "valid_targets_min": 1093 | |
| }, | |
| { | |
| "epoch": 2.176, | |
| "grad_norm": 0.3805101077019346, | |
| "learning_rate": 2.7828789273996748e-05, | |
| "loss": 0.3513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1607498824596405, | |
| "step": 1360, | |
| "valid_targets_mean": 7038.1, | |
| "valid_targets_min": 950 | |
| }, | |
| { | |
| "epoch": 2.184, | |
| "grad_norm": 0.4436216784104909, | |
| "learning_rate": 2.7725861630018703e-05, | |
| "loss": 0.3757, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24152888357639313, | |
| "step": 1365, | |
| "valid_targets_mean": 6961.2, | |
| "valid_targets_min": 2951 | |
| }, | |
| { | |
| "epoch": 2.192, | |
| "grad_norm": 0.49361804115715835, | |
| "learning_rate": 2.7622692909518423e-05, | |
| "loss": 0.3367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1655840426683426, | |
| "step": 1370, | |
| "valid_targets_mean": 3099.6, | |
| "valid_targets_min": 776 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.3908270372770131, | |
| "learning_rate": 2.7519286331755766e-05, | |
| "loss": 0.3292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20059353113174438, | |
| "step": 1375, | |
| "valid_targets_mean": 6875.1, | |
| "valid_targets_min": 863 | |
| }, | |
| { | |
| "epoch": 2.208, | |
| "grad_norm": 0.3381814363001518, | |
| "learning_rate": 2.7415645123412672e-05, | |
| "loss": 0.3038, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14437264204025269, | |
| "step": 1380, | |
| "valid_targets_mean": 5681.9, | |
| "valid_targets_min": 1259 | |
| }, | |
| { | |
| "epoch": 2.216, | |
| "grad_norm": 0.4335970734003149, | |
| "learning_rate": 2.731177251849246e-05, | |
| "loss": 0.3905, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12784774601459503, | |
| "step": 1385, | |
| "valid_targets_mean": 3582.9, | |
| "valid_targets_min": 1328 | |
| }, | |
| { | |
| "epoch": 2.224, | |
| "grad_norm": 0.5013659443395456, | |
| "learning_rate": 2.7207671758218884e-05, | |
| "loss": 0.324, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20887424051761627, | |
| "step": 1390, | |
| "valid_targets_mean": 3934.0, | |
| "valid_targets_min": 1019 | |
| }, | |
| { | |
| "epoch": 2.232, | |
| "grad_norm": 0.5451971071443736, | |
| "learning_rate": 2.710334609093504e-05, | |
| "loss": 0.3229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12654241919517517, | |
| "step": 1395, | |
| "valid_targets_mean": 2552.6, | |
| "valid_targets_min": 1080 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 0.5275185534002302, | |
| "learning_rate": 2.699879877200198e-05, | |
| "loss": 0.3476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17860426008701324, | |
| "step": 1400, | |
| "valid_targets_mean": 4925.5, | |
| "valid_targets_min": 1138 | |
| }, | |
| { | |
| "epoch": 2.248, | |
| "grad_norm": 0.4750860893446014, | |
| "learning_rate": 2.6894033063697143e-05, | |
| "loss": 0.3167, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16186845302581787, | |
| "step": 1405, | |
| "valid_targets_mean": 3735.0, | |
| "valid_targets_min": 484 | |
| }, | |
| { | |
| "epoch": 2.2560000000000002, | |
| "grad_norm": 0.5373568486267774, | |
| "learning_rate": 2.6789052235112554e-05, | |
| "loss": 0.3537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22873114049434662, | |
| "step": 1410, | |
| "valid_targets_mean": 4564.0, | |
| "valid_targets_min": 632 | |
| }, | |
| { | |
| "epoch": 2.2640000000000002, | |
| "grad_norm": 0.5788908168658097, | |
| "learning_rate": 2.66838595620528e-05, | |
| "loss": 0.3401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2733263075351715, | |
| "step": 1415, | |
| "valid_targets_mean": 4478.9, | |
| "valid_targets_min": 673 | |
| }, | |
| { | |
| "epoch": 2.2720000000000002, | |
| "grad_norm": 0.586889266057967, | |
| "learning_rate": 2.6578458326932842e-05, | |
| "loss": 0.3683, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17043578624725342, | |
| "step": 1420, | |
| "valid_targets_mean": 2696.2, | |
| "valid_targets_min": 811 | |
| }, | |
| { | |
| "epoch": 2.2800000000000002, | |
| "grad_norm": 0.493980927370087, | |
| "learning_rate": 2.6472851818675583e-05, | |
| "loss": 0.3391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2201269567012787, | |
| "step": 1425, | |
| "valid_targets_mean": 4983.6, | |
| "valid_targets_min": 810 | |
| }, | |
| { | |
| "epoch": 2.288, | |
| "grad_norm": 0.3286261942090746, | |
| "learning_rate": 2.6367043332609223e-05, | |
| "loss": 0.3392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13666635751724243, | |
| "step": 1430, | |
| "valid_targets_mean": 7580.5, | |
| "valid_targets_min": 1536 | |
| }, | |
| { | |
| "epoch": 2.296, | |
| "grad_norm": 0.4699948466545275, | |
| "learning_rate": 2.6261036170364448e-05, | |
| "loss": 0.3476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1846490055322647, | |
| "step": 1435, | |
| "valid_targets_mean": 3870.8, | |
| "valid_targets_min": 660 | |
| }, | |
| { | |
| "epoch": 2.304, | |
| "grad_norm": 0.6157253523596955, | |
| "learning_rate": 2.6154833639771415e-05, | |
| "loss": 0.362, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22415241599082947, | |
| "step": 1440, | |
| "valid_targets_mean": 2897.1, | |
| "valid_targets_min": 1086 | |
| }, | |
| { | |
| "epoch": 2.312, | |
| "grad_norm": 0.45969123667163286, | |
| "learning_rate": 2.6048439054756492e-05, | |
| "loss": 0.3217, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17089970409870148, | |
| "step": 1445, | |
| "valid_targets_mean": 4007.8, | |
| "valid_targets_min": 905 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 0.4423705287499431, | |
| "learning_rate": 2.594185573523892e-05, | |
| "loss": 0.3455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1676362156867981, | |
| "step": 1450, | |
| "valid_targets_mean": 8078.1, | |
| "valid_targets_min": 969 | |
| }, | |
| { | |
| "epoch": 2.328, | |
| "grad_norm": 0.5054530871166943, | |
| "learning_rate": 2.583508700702716e-05, | |
| "loss": 0.3509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13996833562850952, | |
| "step": 1455, | |
| "valid_targets_mean": 3282.5, | |
| "valid_targets_min": 864 | |
| }, | |
| { | |
| "epoch": 2.336, | |
| "grad_norm": 0.4468904131293525, | |
| "learning_rate": 2.572813620171513e-05, | |
| "loss": 0.3255, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1576099395751953, | |
| "step": 1460, | |
| "valid_targets_mean": 4571.8, | |
| "valid_targets_min": 1272 | |
| }, | |
| { | |
| "epoch": 2.344, | |
| "grad_norm": 0.4624486850943764, | |
| "learning_rate": 2.5621006656578267e-05, | |
| "loss": 0.3153, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15907934308052063, | |
| "step": 1465, | |
| "valid_targets_mean": 4421.0, | |
| "valid_targets_min": 982 | |
| }, | |
| { | |
| "epoch": 2.352, | |
| "grad_norm": 0.4307528801295405, | |
| "learning_rate": 2.5513701714469373e-05, | |
| "loss": 0.3714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1317245364189148, | |
| "step": 1470, | |
| "valid_targets_mean": 3624.6, | |
| "valid_targets_min": 854 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 0.5446119589065747, | |
| "learning_rate": 2.540622472371429e-05, | |
| "loss": 0.3409, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1581343561410904, | |
| "step": 1475, | |
| "valid_targets_mean": 3219.8, | |
| "valid_targets_min": 535 | |
| }, | |
| { | |
| "epoch": 2.368, | |
| "grad_norm": 0.7703262680045895, | |
| "learning_rate": 2.5298579038007478e-05, | |
| "loss": 0.351, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18610429763793945, | |
| "step": 1480, | |
| "valid_targets_mean": 3620.9, | |
| "valid_targets_min": 1339 | |
| }, | |
| { | |
| "epoch": 2.376, | |
| "grad_norm": 0.4079087647088371, | |
| "learning_rate": 2.519076801630727e-05, | |
| "loss": 0.3062, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14258792996406555, | |
| "step": 1485, | |
| "valid_targets_mean": 5352.1, | |
| "valid_targets_min": 1055 | |
| }, | |
| { | |
| "epoch": 2.384, | |
| "grad_norm": 0.514685400749292, | |
| "learning_rate": 2.508279502273117e-05, | |
| "loss": 0.335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20326395332813263, | |
| "step": 1490, | |
| "valid_targets_mean": 4564.5, | |
| "valid_targets_min": 974 | |
| }, | |
| { | |
| "epoch": 2.392, | |
| "grad_norm": 0.5844991616700916, | |
| "learning_rate": 2.4974663426450798e-05, | |
| "loss": 0.3599, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2439168244600296, | |
| "step": 1495, | |
| "valid_targets_mean": 3669.2, | |
| "valid_targets_min": 1138 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.4534268106459526, | |
| "learning_rate": 2.4866376601586798e-05, | |
| "loss": 0.3367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14468619227409363, | |
| "step": 1500, | |
| "valid_targets_mean": 4236.1, | |
| "valid_targets_min": 337 | |
| }, | |
| { | |
| "epoch": 2.408, | |
| "grad_norm": 0.37149672658326494, | |
| "learning_rate": 2.475793792710352e-05, | |
| "loss": 0.3079, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15612857043743134, | |
| "step": 1505, | |
| "valid_targets_mean": 5706.2, | |
| "valid_targets_min": 1033 | |
| }, | |
| { | |
| "epoch": 2.416, | |
| "grad_norm": 0.431204790605149, | |
| "learning_rate": 2.4649350786703637e-05, | |
| "loss": 0.3472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15120291709899902, | |
| "step": 1510, | |
| "valid_targets_mean": 3504.2, | |
| "valid_targets_min": 537 | |
| }, | |
| { | |
| "epoch": 2.424, | |
| "grad_norm": 0.38058066092756215, | |
| "learning_rate": 2.45406185687225e-05, | |
| "loss": 0.3334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2249833345413208, | |
| "step": 1515, | |
| "valid_targets_mean": 7466.4, | |
| "valid_targets_min": 877 | |
| }, | |
| { | |
| "epoch": 2.432, | |
| "grad_norm": 0.4958529733126483, | |
| "learning_rate": 2.443174466602246e-05, | |
| "loss": 0.3362, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18912896513938904, | |
| "step": 1520, | |
| "valid_targets_mean": 3740.2, | |
| "valid_targets_min": 1103 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 0.44552157761164163, | |
| "learning_rate": 2.4322732475886953e-05, | |
| "loss": 0.3425, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.196616530418396, | |
| "step": 1525, | |
| "valid_targets_mean": 5088.4, | |
| "valid_targets_min": 1383 | |
| }, | |
| { | |
| "epoch": 2.448, | |
| "grad_norm": 0.5732608370374113, | |
| "learning_rate": 2.4213585399914528e-05, | |
| "loss": 0.3386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.261408269405365, | |
| "step": 1530, | |
| "valid_targets_mean": 5127.4, | |
| "valid_targets_min": 914 | |
| }, | |
| { | |
| "epoch": 2.456, | |
| "grad_norm": 0.4646397657685872, | |
| "learning_rate": 2.4104306843912687e-05, | |
| "loss": 0.3481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19430388510227203, | |
| "step": 1535, | |
| "valid_targets_mean": 4647.1, | |
| "valid_targets_min": 841 | |
| }, | |
| { | |
| "epoch": 2.464, | |
| "grad_norm": 0.44219115003302095, | |
| "learning_rate": 2.3994900217791615e-05, | |
| "loss": 0.3248, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14268314838409424, | |
| "step": 1540, | |
| "valid_targets_mean": 3976.5, | |
| "valid_targets_min": 697 | |
| }, | |
| { | |
| "epoch": 2.472, | |
| "grad_norm": 0.4933434455561328, | |
| "learning_rate": 2.3885368935457762e-05, | |
| "loss": 0.3596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16441547870635986, | |
| "step": 1545, | |
| "valid_targets_mean": 3575.2, | |
| "valid_targets_min": 823 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 0.4326667267917598, | |
| "learning_rate": 2.3775716414707355e-05, | |
| "loss": 0.328, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1232375055551529, | |
| "step": 1550, | |
| "valid_targets_mean": 3318.1, | |
| "valid_targets_min": 1079 | |
| }, | |
| { | |
| "epoch": 2.488, | |
| "grad_norm": 0.429583180648258, | |
| "learning_rate": 2.36659460771197e-05, | |
| "loss": 0.3812, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07942848652601242, | |
| "step": 1555, | |
| "valid_targets_mean": 2053.8, | |
| "valid_targets_min": 653 | |
| }, | |
| { | |
| "epoch": 2.496, | |
| "grad_norm": 0.5473918418436651, | |
| "learning_rate": 2.3556061347950455e-05, | |
| "loss": 0.3418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20061814785003662, | |
| "step": 1560, | |
| "valid_targets_mean": 3775.9, | |
| "valid_targets_min": 1127 | |
| }, | |
| { | |
| "epoch": 2.504, | |
| "grad_norm": 0.6024361371215083, | |
| "learning_rate": 2.3446065656024734e-05, | |
| "loss": 0.3522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21523049473762512, | |
| "step": 1565, | |
| "valid_targets_mean": 2751.6, | |
| "valid_targets_min": 708 | |
| }, | |
| { | |
| "epoch": 2.512, | |
| "grad_norm": 0.5006491647087972, | |
| "learning_rate": 2.33359624336301e-05, | |
| "loss": 0.3123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09982918202877045, | |
| "step": 1570, | |
| "valid_targets_mean": 1937.8, | |
| "valid_targets_min": 1018 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 0.36371194764317616, | |
| "learning_rate": 2.3225755116409497e-05, | |
| "loss": 0.3093, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17913754284381866, | |
| "step": 1575, | |
| "valid_targets_mean": 7189.9, | |
| "valid_targets_min": 686 | |
| }, | |
| { | |
| "epoch": 2.528, | |
| "grad_norm": 0.5048673985779927, | |
| "learning_rate": 2.311544714325403e-05, | |
| "loss": 0.3441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1666538417339325, | |
| "step": 1580, | |
| "valid_targets_mean": 3370.5, | |
| "valid_targets_min": 819 | |
| }, | |
| { | |
| "epoch": 2.536, | |
| "grad_norm": 0.5182580499328722, | |
| "learning_rate": 2.300504195619563e-05, | |
| "loss": 0.3615, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2155158817768097, | |
| "step": 1585, | |
| "valid_targets_mean": 4026.4, | |
| "valid_targets_min": 695 | |
| }, | |
| { | |
| "epoch": 2.544, | |
| "grad_norm": 0.468514133638622, | |
| "learning_rate": 2.2894543000299697e-05, | |
| "loss": 0.3323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24576425552368164, | |
| "step": 1590, | |
| "valid_targets_mean": 5894.2, | |
| "valid_targets_min": 942 | |
| }, | |
| { | |
| "epoch": 2.552, | |
| "grad_norm": 0.4884711618253095, | |
| "learning_rate": 2.2783953723557572e-05, | |
| "loss": 0.342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16744929552078247, | |
| "step": 1595, | |
| "valid_targets_mean": 3803.2, | |
| "valid_targets_min": 927 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 0.5999751684945661, | |
| "learning_rate": 2.2673277576778946e-05, | |
| "loss": 0.3547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21919560432434082, | |
| "step": 1600, | |
| "valid_targets_mean": 2878.8, | |
| "valid_targets_min": 994 | |
| }, | |
| { | |
| "epoch": 2.568, | |
| "grad_norm": 0.5423569314618256, | |
| "learning_rate": 2.2562518013484208e-05, | |
| "loss": 0.3769, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24968698620796204, | |
| "step": 1605, | |
| "valid_targets_mean": 3738.9, | |
| "valid_targets_min": 887 | |
| }, | |
| { | |
| "epoch": 2.576, | |
| "grad_norm": 0.5562360071258139, | |
| "learning_rate": 2.245167848979664e-05, | |
| "loss": 0.3489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18698713183403015, | |
| "step": 1610, | |
| "valid_targets_mean": 2892.8, | |
| "valid_targets_min": 1438 | |
| }, | |
| { | |
| "epoch": 2.584, | |
| "grad_norm": 0.5326819387179283, | |
| "learning_rate": 2.23407624643346e-05, | |
| "loss": 0.3371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16616696119308472, | |
| "step": 1615, | |
| "valid_targets_mean": 3816.0, | |
| "valid_targets_min": 906 | |
| }, | |
| { | |
| "epoch": 2.592, | |
| "grad_norm": 0.49126894683922456, | |
| "learning_rate": 2.2229773398103606e-05, | |
| "loss": 0.3507, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16989147663116455, | |
| "step": 1620, | |
| "valid_targets_mean": 3596.0, | |
| "valid_targets_min": 937 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.41955570335468, | |
| "learning_rate": 2.2118714754388323e-05, | |
| "loss": 0.3514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17922857403755188, | |
| "step": 1625, | |
| "valid_targets_mean": 5167.9, | |
| "valid_targets_min": 1261 | |
| }, | |
| { | |
| "epoch": 2.608, | |
| "grad_norm": 0.4466338468560662, | |
| "learning_rate": 2.200758999864449e-05, | |
| "loss": 0.3404, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23674200475215912, | |
| "step": 1630, | |
| "valid_targets_mean": 6806.6, | |
| "valid_targets_min": 1415 | |
| }, | |
| { | |
| "epoch": 2.616, | |
| "grad_norm": 0.42959952004699986, | |
| "learning_rate": 2.1896402598390818e-05, | |
| "loss": 0.3535, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18198445439338684, | |
| "step": 1635, | |
| "valid_targets_mean": 4071.1, | |
| "valid_targets_min": 948 | |
| }, | |
| { | |
| "epoch": 2.624, | |
| "grad_norm": 0.6276829545612954, | |
| "learning_rate": 2.178515602310074e-05, | |
| "loss": 0.3686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3186021149158478, | |
| "step": 1640, | |
| "valid_targets_mean": 4211.4, | |
| "valid_targets_min": 1217 | |
| }, | |
| { | |
| "epoch": 2.632, | |
| "grad_norm": 0.4505510976461976, | |
| "learning_rate": 2.1673853744094193e-05, | |
| "loss": 0.3974, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29543089866638184, | |
| "step": 1645, | |
| "valid_targets_mean": 7278.0, | |
| "valid_targets_min": 1524 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 0.37719949029802746, | |
| "learning_rate": 2.1562499234429283e-05, | |
| "loss": 0.3246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15694212913513184, | |
| "step": 1650, | |
| "valid_targets_mean": 6930.1, | |
| "valid_targets_min": 1011 | |
| }, | |
| { | |
| "epoch": 2.648, | |
| "grad_norm": 0.46205859204161104, | |
| "learning_rate": 2.1451095968793908e-05, | |
| "loss": 0.339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.149054616689682, | |
| "step": 1655, | |
| "valid_targets_mean": 3161.5, | |
| "valid_targets_min": 965 | |
| }, | |
| { | |
| "epoch": 2.656, | |
| "grad_norm": 0.5109521277662005, | |
| "learning_rate": 2.1339647423397337e-05, | |
| "loss": 0.3685, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1951621025800705, | |
| "step": 1660, | |
| "valid_targets_mean": 4541.2, | |
| "valid_targets_min": 775 | |
| }, | |
| { | |
| "epoch": 2.664, | |
| "grad_norm": 0.5496498798895945, | |
| "learning_rate": 2.122815707586176e-05, | |
| "loss": 0.3509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19943974912166595, | |
| "step": 1665, | |
| "valid_targets_mean": 3634.1, | |
| "valid_targets_min": 895 | |
| }, | |
| { | |
| "epoch": 2.672, | |
| "grad_norm": 0.5440227120286691, | |
| "learning_rate": 2.111662840511373e-05, | |
| "loss": 0.3516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1294553130865097, | |
| "step": 1670, | |
| "valid_targets_mean": 2321.6, | |
| "valid_targets_min": 661 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 0.49077600939023164, | |
| "learning_rate": 2.1005064891275638e-05, | |
| "loss": 0.3567, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14773976802825928, | |
| "step": 1675, | |
| "valid_targets_mean": 4320.6, | |
| "valid_targets_min": 886 | |
| }, | |
| { | |
| "epoch": 2.6879999999999997, | |
| "grad_norm": 0.391281755034438, | |
| "learning_rate": 2.0893470015557126e-05, | |
| "loss": 0.3396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10046662390232086, | |
| "step": 1680, | |
| "valid_targets_mean": 5169.5, | |
| "valid_targets_min": 982 | |
| }, | |
| { | |
| "epoch": 2.6959999999999997, | |
| "grad_norm": 0.45801761659477147, | |
| "learning_rate": 2.078184726014643e-05, | |
| "loss": 0.3712, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1757431924343109, | |
| "step": 1685, | |
| "valid_targets_mean": 4211.1, | |
| "valid_targets_min": 1628 | |
| }, | |
| { | |
| "epoch": 2.7039999999999997, | |
| "grad_norm": 0.4549840031695572, | |
| "learning_rate": 2.0670200108101754e-05, | |
| "loss": 0.3328, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1375683844089508, | |
| "step": 1690, | |
| "valid_targets_mean": 3498.6, | |
| "valid_targets_min": 1023 | |
| }, | |
| { | |
| "epoch": 2.7119999999999997, | |
| "grad_norm": 0.4915990617682205, | |
| "learning_rate": 2.0558532043242557e-05, | |
| "loss": 0.3437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0870099812746048, | |
| "step": 1695, | |
| "valid_targets_mean": 3573.2, | |
| "valid_targets_min": 574 | |
| }, | |
| { | |
| "epoch": 2.7199999999999998, | |
| "grad_norm": 0.5924797281596321, | |
| "learning_rate": 2.0446846550040863e-05, | |
| "loss": 0.3685, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22033245861530304, | |
| "step": 1700, | |
| "valid_targets_mean": 2877.0, | |
| "valid_targets_min": 854 | |
| }, | |
| { | |
| "epoch": 2.7279999999999998, | |
| "grad_norm": 0.43877107974753327, | |
| "learning_rate": 2.033514711351253e-05, | |
| "loss": 0.3527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15773963928222656, | |
| "step": 1705, | |
| "valid_targets_mean": 4485.2, | |
| "valid_targets_min": 941 | |
| }, | |
| { | |
| "epoch": 2.7359999999999998, | |
| "grad_norm": 0.530664944625799, | |
| "learning_rate": 2.022343721910851e-05, | |
| "loss": 0.3421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2027740180492401, | |
| "step": 1710, | |
| "valid_targets_mean": 4494.0, | |
| "valid_targets_min": 1226 | |
| }, | |
| { | |
| "epoch": 2.7439999999999998, | |
| "grad_norm": 0.6038027202052175, | |
| "learning_rate": 2.0111720352606054e-05, | |
| "loss": 0.3601, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19138199090957642, | |
| "step": 1715, | |
| "valid_targets_mean": 3350.9, | |
| "valid_targets_min": 1106 | |
| }, | |
| { | |
| "epoch": 2.752, | |
| "grad_norm": 0.48781174342181044, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3113, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19703620672225952, | |
| "step": 1720, | |
| "valid_targets_mean": 5906.1, | |
| "valid_targets_min": 1657 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 0.4902966583040599, | |
| "learning_rate": 1.988827964739395e-05, | |
| "loss": 0.3293, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12021969258785248, | |
| "step": 1725, | |
| "valid_targets_mean": 2399.0, | |
| "valid_targets_min": 897 | |
| }, | |
| { | |
| "epoch": 2.768, | |
| "grad_norm": 0.47146153893588105, | |
| "learning_rate": 1.9776562780891494e-05, | |
| "loss": 0.3446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14058835804462433, | |
| "step": 1730, | |
| "valid_targets_mean": 2738.2, | |
| "valid_targets_min": 1074 | |
| }, | |
| { | |
| "epoch": 2.776, | |
| "grad_norm": 0.5388251301985688, | |
| "learning_rate": 1.966485288648747e-05, | |
| "loss": 0.3284, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16873209178447723, | |
| "step": 1735, | |
| "valid_targets_mean": 3595.6, | |
| "valid_targets_min": 844 | |
| }, | |
| { | |
| "epoch": 2.784, | |
| "grad_norm": 0.5085083642233609, | |
| "learning_rate": 1.9553153449959144e-05, | |
| "loss": 0.3548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17369981110095978, | |
| "step": 1740, | |
| "valid_targets_mean": 4477.5, | |
| "valid_targets_min": 905 | |
| }, | |
| { | |
| "epoch": 2.792, | |
| "grad_norm": 0.49910923546702074, | |
| "learning_rate": 1.9441467956757453e-05, | |
| "loss": 0.3631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12397250533103943, | |
| "step": 1745, | |
| "valid_targets_mean": 2318.6, | |
| "valid_targets_min": 868 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.48862405925011343, | |
| "learning_rate": 1.9329799891898256e-05, | |
| "loss": 0.3353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23336371779441833, | |
| "step": 1750, | |
| "valid_targets_mean": 4532.2, | |
| "valid_targets_min": 1037 | |
| }, | |
| { | |
| "epoch": 2.808, | |
| "grad_norm": 0.39497314431233704, | |
| "learning_rate": 1.9218152739853576e-05, | |
| "loss": 0.3471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1946973204612732, | |
| "step": 1755, | |
| "valid_targets_mean": 5582.1, | |
| "valid_targets_min": 710 | |
| }, | |
| { | |
| "epoch": 2.816, | |
| "grad_norm": 0.5003296582395104, | |
| "learning_rate": 1.9106529984442884e-05, | |
| "loss": 0.3275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2435501217842102, | |
| "step": 1760, | |
| "valid_targets_mean": 5183.2, | |
| "valid_targets_min": 1061 | |
| }, | |
| { | |
| "epoch": 2.824, | |
| "grad_norm": 0.52781859107085, | |
| "learning_rate": 1.8994935108724366e-05, | |
| "loss": 0.345, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24607789516448975, | |
| "step": 1765, | |
| "valid_targets_mean": 4977.9, | |
| "valid_targets_min": 668 | |
| }, | |
| { | |
| "epoch": 2.832, | |
| "grad_norm": 0.4870668187855425, | |
| "learning_rate": 1.8883371594886276e-05, | |
| "loss": 0.3378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17322807013988495, | |
| "step": 1770, | |
| "valid_targets_mean": 4143.9, | |
| "valid_targets_min": 721 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 0.5501480651217011, | |
| "learning_rate": 1.877184292413824e-05, | |
| "loss": 0.3385, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1610127091407776, | |
| "step": 1775, | |
| "valid_targets_mean": 2323.1, | |
| "valid_targets_min": 760 | |
| }, | |
| { | |
| "epoch": 2.848, | |
| "grad_norm": 0.5665295596550164, | |
| "learning_rate": 1.8660352576602663e-05, | |
| "loss": 0.344, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23653550446033478, | |
| "step": 1780, | |
| "valid_targets_mean": 4046.8, | |
| "valid_targets_min": 559 | |
| }, | |
| { | |
| "epoch": 2.856, | |
| "grad_norm": 0.47013239118348343, | |
| "learning_rate": 1.8548904031206102e-05, | |
| "loss": 0.3472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20719118416309357, | |
| "step": 1785, | |
| "valid_targets_mean": 5973.1, | |
| "valid_targets_min": 824 | |
| }, | |
| { | |
| "epoch": 2.864, | |
| "grad_norm": 0.4773248045425617, | |
| "learning_rate": 1.843750076557072e-05, | |
| "loss": 0.3382, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18604245781898499, | |
| "step": 1790, | |
| "valid_targets_mean": 5011.6, | |
| "valid_targets_min": 1589 | |
| }, | |
| { | |
| "epoch": 2.872, | |
| "grad_norm": 0.7432332329422374, | |
| "learning_rate": 1.832614625590581e-05, | |
| "loss": 0.3387, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20564137399196625, | |
| "step": 1795, | |
| "valid_targets_mean": 2076.8, | |
| "valid_targets_min": 701 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 0.5780630575919011, | |
| "learning_rate": 1.8214843976899264e-05, | |
| "loss": 0.3475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.168882817029953, | |
| "step": 1800, | |
| "valid_targets_mean": 2690.0, | |
| "valid_targets_min": 973 | |
| }, | |
| { | |
| "epoch": 2.888, | |
| "grad_norm": 0.4048713565752578, | |
| "learning_rate": 1.810359740160919e-05, | |
| "loss": 0.3294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11568081378936768, | |
| "step": 1805, | |
| "valid_targets_mean": 4304.0, | |
| "valid_targets_min": 1228 | |
| }, | |
| { | |
| "epoch": 2.896, | |
| "grad_norm": 0.5039084001370419, | |
| "learning_rate": 1.7992410001355515e-05, | |
| "loss": 0.3471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1639002561569214, | |
| "step": 1810, | |
| "valid_targets_mean": 3705.6, | |
| "valid_targets_min": 678 | |
| }, | |
| { | |
| "epoch": 2.904, | |
| "grad_norm": 0.8029006281339147, | |
| "learning_rate": 1.788128524561168e-05, | |
| "loss": 0.342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15965096652507782, | |
| "step": 1815, | |
| "valid_targets_mean": 1893.2, | |
| "valid_targets_min": 908 | |
| }, | |
| { | |
| "epoch": 2.912, | |
| "grad_norm": 0.45687805144339416, | |
| "learning_rate": 1.7770226601896397e-05, | |
| "loss": 0.3296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1370457112789154, | |
| "step": 1820, | |
| "valid_targets_mean": 3719.6, | |
| "valid_targets_min": 1436 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 0.4584149777451423, | |
| "learning_rate": 1.7659237535665404e-05, | |
| "loss": 0.3318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18669384717941284, | |
| "step": 1825, | |
| "valid_targets_mean": 4329.4, | |
| "valid_targets_min": 792 | |
| }, | |
| { | |
| "epoch": 2.928, | |
| "grad_norm": 0.3857893662873705, | |
| "learning_rate": 1.754832151020337e-05, | |
| "loss": 0.3181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17508453130722046, | |
| "step": 1830, | |
| "valid_targets_mean": 6344.2, | |
| "valid_targets_min": 807 | |
| }, | |
| { | |
| "epoch": 2.936, | |
| "grad_norm": 0.5909250437931737, | |
| "learning_rate": 1.74374819865158e-05, | |
| "loss": 0.318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1460743248462677, | |
| "step": 1835, | |
| "valid_targets_mean": 2246.0, | |
| "valid_targets_min": 1028 | |
| }, | |
| { | |
| "epoch": 2.944, | |
| "grad_norm": 0.6168729718884929, | |
| "learning_rate": 1.7326722423221057e-05, | |
| "loss": 0.3609, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16383513808250427, | |
| "step": 1840, | |
| "valid_targets_mean": 2735.9, | |
| "valid_targets_min": 1176 | |
| }, | |
| { | |
| "epoch": 2.952, | |
| "grad_norm": 0.35833838993310363, | |
| "learning_rate": 1.7216046276442438e-05, | |
| "loss": 0.3415, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1925455927848816, | |
| "step": 1845, | |
| "valid_targets_mean": 8259.6, | |
| "valid_targets_min": 1127 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.5568115528689965, | |
| "learning_rate": 1.7105456999700306e-05, | |
| "loss": 0.3607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18549929559230804, | |
| "step": 1850, | |
| "valid_targets_mean": 3148.5, | |
| "valid_targets_min": 829 | |
| }, | |
| { | |
| "epoch": 2.968, | |
| "grad_norm": 0.44855376408233205, | |
| "learning_rate": 1.6994958043804374e-05, | |
| "loss": 0.3437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14252299070358276, | |
| "step": 1855, | |
| "valid_targets_mean": 3142.0, | |
| "valid_targets_min": 1217 | |
| }, | |
| { | |
| "epoch": 2.976, | |
| "grad_norm": 0.4728038820881367, | |
| "learning_rate": 1.6884552856745972e-05, | |
| "loss": 0.3444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10633358359336853, | |
| "step": 1860, | |
| "valid_targets_mean": 1938.1, | |
| "valid_targets_min": 506 | |
| }, | |
| { | |
| "epoch": 2.984, | |
| "grad_norm": 0.6465001523766998, | |
| "learning_rate": 1.6774244883590503e-05, | |
| "loss": 0.3675, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1333983838558197, | |
| "step": 1865, | |
| "valid_targets_mean": 1810.0, | |
| "valid_targets_min": 721 | |
| }, | |
| { | |
| "epoch": 2.992, | |
| "grad_norm": 0.42614834103402127, | |
| "learning_rate": 1.6664037566369905e-05, | |
| "loss": 0.3441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14628413319587708, | |
| "step": 1870, | |
| "valid_targets_mean": 5585.8, | |
| "valid_targets_min": 1323 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.5627282905947615, | |
| "learning_rate": 1.6553934343975273e-05, | |
| "loss": 0.365, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25714242458343506, | |
| "step": 1875, | |
| "valid_targets_mean": 4970.8, | |
| "valid_targets_min": 1718 | |
| }, | |
| { | |
| "epoch": 3.008, | |
| "grad_norm": 0.5335261046503706, | |
| "learning_rate": 1.644393865204955e-05, | |
| "loss": 0.3588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12273615598678589, | |
| "step": 1880, | |
| "valid_targets_mean": 3116.5, | |
| "valid_targets_min": 1219 | |
| }, | |
| { | |
| "epoch": 3.016, | |
| "grad_norm": 1.132349702215561, | |
| "learning_rate": 1.6334053922880304e-05, | |
| "loss": 0.3097, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12334040552377701, | |
| "step": 1885, | |
| "valid_targets_mean": 5509.8, | |
| "valid_targets_min": 1485 | |
| }, | |
| { | |
| "epoch": 3.024, | |
| "grad_norm": 0.44315501434081866, | |
| "learning_rate": 1.622428358529265e-05, | |
| "loss": 0.3306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1480335295200348, | |
| "step": 1890, | |
| "valid_targets_mean": 3953.9, | |
| "valid_targets_min": 596 | |
| }, | |
| { | |
| "epoch": 3.032, | |
| "grad_norm": 0.588510096981289, | |
| "learning_rate": 1.611463106454224e-05, | |
| "loss": 0.3392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23434148728847504, | |
| "step": 1895, | |
| "valid_targets_mean": 4087.4, | |
| "valid_targets_min": 1260 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 0.42621894302985963, | |
| "learning_rate": 1.6005099782208392e-05, | |
| "loss": 0.3477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18396449089050293, | |
| "step": 1900, | |
| "valid_targets_mean": 5456.5, | |
| "valid_targets_min": 2731 | |
| }, | |
| { | |
| "epoch": 3.048, | |
| "grad_norm": 0.41823653199244526, | |
| "learning_rate": 1.5895693156087317e-05, | |
| "loss": 0.3079, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19087563455104828, | |
| "step": 1905, | |
| "valid_targets_mean": 6455.0, | |
| "valid_targets_min": 1013 | |
| }, | |
| { | |
| "epoch": 3.056, | |
| "grad_norm": 0.4782619422592544, | |
| "learning_rate": 1.578641460008548e-05, | |
| "loss": 0.3172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1430778205394745, | |
| "step": 1910, | |
| "valid_targets_mean": 3750.5, | |
| "valid_targets_min": 725 | |
| }, | |
| { | |
| "epoch": 3.064, | |
| "grad_norm": 0.4189615311261732, | |
| "learning_rate": 1.5677267524113054e-05, | |
| "loss": 0.3169, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15594616532325745, | |
| "step": 1915, | |
| "valid_targets_mean": 6085.0, | |
| "valid_targets_min": 1033 | |
| }, | |
| { | |
| "epoch": 3.072, | |
| "grad_norm": 0.4552341804156781, | |
| "learning_rate": 1.5568255333977547e-05, | |
| "loss": 0.2908, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1645529717206955, | |
| "step": 1920, | |
| "valid_targets_mean": 4930.0, | |
| "valid_targets_min": 1100 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "grad_norm": 0.4123722916613195, | |
| "learning_rate": 1.5459381431277506e-05, | |
| "loss": 0.326, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10328075289726257, | |
| "step": 1925, | |
| "valid_targets_mean": 4879.5, | |
| "valid_targets_min": 886 | |
| }, | |
| { | |
| "epoch": 3.088, | |
| "grad_norm": 0.49773690652553637, | |
| "learning_rate": 1.5350649213296373e-05, | |
| "loss": 0.3312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1240905225276947, | |
| "step": 1930, | |
| "valid_targets_mean": 2668.1, | |
| "valid_targets_min": 936 | |
| }, | |
| { | |
| "epoch": 3.096, | |
| "grad_norm": 0.45278449933874565, | |
| "learning_rate": 1.5242062072896483e-05, | |
| "loss": 0.3371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13505850732326508, | |
| "step": 1935, | |
| "valid_targets_mean": 3849.6, | |
| "valid_targets_min": 845 | |
| }, | |
| { | |
| "epoch": 3.104, | |
| "grad_norm": 0.3913986266762585, | |
| "learning_rate": 1.5133623398413209e-05, | |
| "loss": 0.3189, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1345825046300888, | |
| "step": 1940, | |
| "valid_targets_mean": 4995.1, | |
| "valid_targets_min": 1052 | |
| }, | |
| { | |
| "epoch": 3.112, | |
| "grad_norm": 0.6573102578659725, | |
| "learning_rate": 1.50253365735492e-05, | |
| "loss": 0.3329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13540002703666687, | |
| "step": 1945, | |
| "valid_targets_mean": 1855.5, | |
| "valid_targets_min": 617 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 0.5353571868182, | |
| "learning_rate": 1.4917204977268833e-05, | |
| "loss": 0.3487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10793092101812363, | |
| "step": 1950, | |
| "valid_targets_mean": 2389.6, | |
| "valid_targets_min": 499 | |
| }, | |
| { | |
| "epoch": 3.128, | |
| "grad_norm": 0.45964730669624526, | |
| "learning_rate": 1.4809231983692733e-05, | |
| "loss": 0.3234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18516826629638672, | |
| "step": 1955, | |
| "valid_targets_mean": 6327.5, | |
| "valid_targets_min": 1400 | |
| }, | |
| { | |
| "epoch": 3.136, | |
| "grad_norm": 0.5326551897217631, | |
| "learning_rate": 1.4701420961992533e-05, | |
| "loss": 0.309, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11213398724794388, | |
| "step": 1960, | |
| "valid_targets_mean": 2593.1, | |
| "valid_targets_min": 638 | |
| }, | |
| { | |
| "epoch": 3.144, | |
| "grad_norm": 0.5727183007723465, | |
| "learning_rate": 1.459377527628571e-05, | |
| "loss": 0.3177, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0960133746266365, | |
| "step": 1965, | |
| "valid_targets_mean": 1934.0, | |
| "valid_targets_min": 576 | |
| }, | |
| { | |
| "epoch": 3.152, | |
| "grad_norm": 0.5747836643926282, | |
| "learning_rate": 1.4486298285530634e-05, | |
| "loss": 0.3613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2422075867652893, | |
| "step": 1970, | |
| "valid_targets_mean": 4018.4, | |
| "valid_targets_min": 722 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "grad_norm": 0.4200083023418681, | |
| "learning_rate": 1.4378993343421736e-05, | |
| "loss": 0.3153, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13149552047252655, | |
| "step": 1975, | |
| "valid_targets_mean": 4988.6, | |
| "valid_targets_min": 1289 | |
| }, | |
| { | |
| "epoch": 3.168, | |
| "grad_norm": 0.5127779132992235, | |
| "learning_rate": 1.4271863798284877e-05, | |
| "loss": 0.3095, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12630169093608856, | |
| "step": 1980, | |
| "valid_targets_mean": 2393.5, | |
| "valid_targets_min": 690 | |
| }, | |
| { | |
| "epoch": 3.176, | |
| "grad_norm": 0.5792388346676679, | |
| "learning_rate": 1.4164912992972846e-05, | |
| "loss": 0.3446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16788384318351746, | |
| "step": 1985, | |
| "valid_targets_mean": 3161.4, | |
| "valid_targets_min": 829 | |
| }, | |
| { | |
| "epoch": 3.184, | |
| "grad_norm": 0.5717202482455881, | |
| "learning_rate": 1.4058144264761087e-05, | |
| "loss": 0.2997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19649925827980042, | |
| "step": 1990, | |
| "valid_targets_mean": 4664.4, | |
| "valid_targets_min": 955 | |
| }, | |
| { | |
| "epoch": 3.192, | |
| "grad_norm": 0.6151627913778978, | |
| "learning_rate": 1.3951560945243517e-05, | |
| "loss": 0.3316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16047929227352142, | |
| "step": 1995, | |
| "valid_targets_mean": 2772.8, | |
| "valid_targets_min": 793 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 0.4242933168481296, | |
| "learning_rate": 1.3845166360228597e-05, | |
| "loss": 0.3652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19517377018928528, | |
| "step": 2000, | |
| "valid_targets_mean": 6388.5, | |
| "valid_targets_min": 495 | |
| }, | |
| { | |
| "epoch": 3.208, | |
| "grad_norm": 0.5003469604574386, | |
| "learning_rate": 1.3738963829635559e-05, | |
| "loss": 0.3156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16463890671730042, | |
| "step": 2005, | |
| "valid_targets_mean": 4910.6, | |
| "valid_targets_min": 833 | |
| }, | |
| { | |
| "epoch": 3.216, | |
| "grad_norm": 0.9508204216990079, | |
| "learning_rate": 1.3632956667390784e-05, | |
| "loss": 0.2938, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14386102557182312, | |
| "step": 2010, | |
| "valid_targets_mean": 3383.9, | |
| "valid_targets_min": 1221 | |
| }, | |
| { | |
| "epoch": 3.224, | |
| "grad_norm": 0.47315611427798243, | |
| "learning_rate": 1.3527148181324425e-05, | |
| "loss": 0.3215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1463852822780609, | |
| "step": 2015, | |
| "valid_targets_mean": 4139.1, | |
| "valid_targets_min": 881 | |
| }, | |
| { | |
| "epoch": 3.232, | |
| "grad_norm": 0.4910524255424862, | |
| "learning_rate": 1.3421541673067168e-05, | |
| "loss": 0.3058, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19467991590499878, | |
| "step": 2020, | |
| "valid_targets_mean": 5570.2, | |
| "valid_targets_min": 1334 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "grad_norm": 0.42714613992182937, | |
| "learning_rate": 1.3316140437947207e-05, | |
| "loss": 0.2985, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12120620906352997, | |
| "step": 2025, | |
| "valid_targets_mean": 4442.6, | |
| "valid_targets_min": 704 | |
| }, | |
| { | |
| "epoch": 3.248, | |
| "grad_norm": 0.5255009932566614, | |
| "learning_rate": 1.321094776488745e-05, | |
| "loss": 0.3184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1309448778629303, | |
| "step": 2030, | |
| "valid_targets_mean": 3026.9, | |
| "valid_targets_min": 999 | |
| }, | |
| { | |
| "epoch": 3.2560000000000002, | |
| "grad_norm": 0.530757098061631, | |
| "learning_rate": 1.3105966936302856e-05, | |
| "loss": 0.3272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1335388720035553, | |
| "step": 2035, | |
| "valid_targets_mean": 3075.5, | |
| "valid_targets_min": 1200 | |
| }, | |
| { | |
| "epoch": 3.2640000000000002, | |
| "grad_norm": 0.4322808474453205, | |
| "learning_rate": 1.3001201227998023e-05, | |
| "loss": 0.3245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17669960856437683, | |
| "step": 2040, | |
| "valid_targets_mean": 8023.5, | |
| "valid_targets_min": 574 | |
| }, | |
| { | |
| "epoch": 3.2720000000000002, | |
| "grad_norm": 0.45545029411121973, | |
| "learning_rate": 1.2896653909064964e-05, | |
| "loss": 0.3091, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12387904524803162, | |
| "step": 2045, | |
| "valid_targets_mean": 4796.9, | |
| "valid_targets_min": 944 | |
| }, | |
| { | |
| "epoch": 3.2800000000000002, | |
| "grad_norm": 0.4647985673919204, | |
| "learning_rate": 1.2792328241781124e-05, | |
| "loss": 0.3246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13686639070510864, | |
| "step": 2050, | |
| "valid_targets_mean": 4213.1, | |
| "valid_targets_min": 430 | |
| }, | |
| { | |
| "epoch": 3.288, | |
| "grad_norm": 0.638531208310754, | |
| "learning_rate": 1.2688227481507546e-05, | |
| "loss": 0.3168, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12405982613563538, | |
| "step": 2055, | |
| "valid_targets_mean": 3198.2, | |
| "valid_targets_min": 834 | |
| }, | |
| { | |
| "epoch": 3.296, | |
| "grad_norm": 0.3881187620470557, | |
| "learning_rate": 1.258435487658733e-05, | |
| "loss": 0.3283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14476531744003296, | |
| "step": 2060, | |
| "valid_targets_mean": 4399.9, | |
| "valid_targets_min": 971 | |
| }, | |
| { | |
| "epoch": 3.304, | |
| "grad_norm": 0.4425558771523959, | |
| "learning_rate": 1.2480713668244243e-05, | |
| "loss": 0.3007, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14966867864131927, | |
| "step": 2065, | |
| "valid_targets_mean": 5938.1, | |
| "valid_targets_min": 1014 | |
| }, | |
| { | |
| "epoch": 3.312, | |
| "grad_norm": 0.47898598677849, | |
| "learning_rate": 1.2377307090481586e-05, | |
| "loss": 0.3288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13591575622558594, | |
| "step": 2070, | |
| "valid_targets_mean": 4385.5, | |
| "valid_targets_min": 721 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "grad_norm": 0.6460674905120861, | |
| "learning_rate": 1.2274138369981298e-05, | |
| "loss": 0.3089, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23174312710762024, | |
| "step": 2075, | |
| "valid_targets_mean": 3097.0, | |
| "valid_targets_min": 630 | |
| }, | |
| { | |
| "epoch": 3.328, | |
| "grad_norm": 0.42734464278344975, | |
| "learning_rate": 1.2171210726003256e-05, | |
| "loss": 0.3276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16050082445144653, | |
| "step": 2080, | |
| "valid_targets_mean": 5419.9, | |
| "valid_targets_min": 867 | |
| }, | |
| { | |
| "epoch": 3.336, | |
| "grad_norm": 0.47895472971752917, | |
| "learning_rate": 1.2068527370284815e-05, | |
| "loss": 0.3176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16464433073997498, | |
| "step": 2085, | |
| "valid_targets_mean": 3824.9, | |
| "valid_targets_min": 1169 | |
| }, | |
| { | |
| "epoch": 3.344, | |
| "grad_norm": 0.4240551447146959, | |
| "learning_rate": 1.1966091506940616e-05, | |
| "loss": 0.3274, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11462630331516266, | |
| "step": 2090, | |
| "valid_targets_mean": 4184.4, | |
| "valid_targets_min": 832 | |
| }, | |
| { | |
| "epoch": 3.352, | |
| "grad_norm": 0.7394655211812231, | |
| "learning_rate": 1.1863906332362569e-05, | |
| "loss": 0.3628, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1140076294541359, | |
| "step": 2095, | |
| "valid_targets_mean": 2578.2, | |
| "valid_targets_min": 786 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "grad_norm": 0.41922280281116975, | |
| "learning_rate": 1.176197503512015e-05, | |
| "loss": 0.3148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.159539595246315, | |
| "step": 2100, | |
| "valid_targets_mean": 5751.0, | |
| "valid_targets_min": 808 | |
| }, | |
| { | |
| "epoch": 3.368, | |
| "grad_norm": 0.5584401926293026, | |
| "learning_rate": 1.1660300795860877e-05, | |
| "loss": 0.3253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1747303605079651, | |
| "step": 2105, | |
| "valid_targets_mean": 2999.6, | |
| "valid_targets_min": 834 | |
| }, | |
| { | |
| "epoch": 3.376, | |
| "grad_norm": 0.4166954589757251, | |
| "learning_rate": 1.1558886787211071e-05, | |
| "loss": 0.2865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1294165849685669, | |
| "step": 2110, | |
| "valid_targets_mean": 4363.6, | |
| "valid_targets_min": 546 | |
| }, | |
| { | |
| "epoch": 3.384, | |
| "grad_norm": 0.5662294561036999, | |
| "learning_rate": 1.1457736173676883e-05, | |
| "loss": 0.3209, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12514197826385498, | |
| "step": 2115, | |
| "valid_targets_mean": 2388.6, | |
| "valid_targets_min": 974 | |
| }, | |
| { | |
| "epoch": 3.392, | |
| "grad_norm": 0.5059434791101876, | |
| "learning_rate": 1.1356852111545493e-05, | |
| "loss": 0.3481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16026899218559265, | |
| "step": 2120, | |
| "valid_targets_mean": 3593.6, | |
| "valid_targets_min": 1325 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 0.5844883650373187, | |
| "learning_rate": 1.1256237748786675e-05, | |
| "loss": 0.3326, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22520673274993896, | |
| "step": 2125, | |
| "valid_targets_mean": 3685.5, | |
| "valid_targets_min": 379 | |
| }, | |
| { | |
| "epoch": 3.408, | |
| "grad_norm": 0.5009787440262468, | |
| "learning_rate": 1.1155896224954543e-05, | |
| "loss": 0.3357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12661316990852356, | |
| "step": 2130, | |
| "valid_targets_mean": 3547.5, | |
| "valid_targets_min": 816 | |
| }, | |
| { | |
| "epoch": 3.416, | |
| "grad_norm": 0.48678971220314976, | |
| "learning_rate": 1.1055830671089578e-05, | |
| "loss": 0.3183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16513441503047943, | |
| "step": 2135, | |
| "valid_targets_mean": 5887.6, | |
| "valid_targets_min": 814 | |
| }, | |
| { | |
| "epoch": 3.424, | |
| "grad_norm": 0.5206971097737679, | |
| "learning_rate": 1.0956044209620966e-05, | |
| "loss": 0.3357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13408246636390686, | |
| "step": 2140, | |
| "valid_targets_mean": 3378.6, | |
| "valid_targets_min": 598 | |
| }, | |
| { | |
| "epoch": 3.432, | |
| "grad_norm": 0.47966318102705424, | |
| "learning_rate": 1.0856539954269121e-05, | |
| "loss": 0.323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16585497558116913, | |
| "step": 2145, | |
| "valid_targets_mean": 4378.1, | |
| "valid_targets_min": 798 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 0.5122761624297089, | |
| "learning_rate": 1.0757321009948543e-05, | |
| "loss": 0.3438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12634073197841644, | |
| "step": 2150, | |
| "valid_targets_mean": 3119.5, | |
| "valid_targets_min": 975 | |
| }, | |
| { | |
| "epoch": 3.448, | |
| "grad_norm": 0.6182750943275146, | |
| "learning_rate": 1.0658390472670938e-05, | |
| "loss": 0.327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1473451852798462, | |
| "step": 2155, | |
| "valid_targets_mean": 2101.5, | |
| "valid_targets_min": 843 | |
| }, | |
| { | |
| "epoch": 3.456, | |
| "grad_norm": 0.4888023729480413, | |
| "learning_rate": 1.0559751429448597e-05, | |
| "loss": 0.3408, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21524950861930847, | |
| "step": 2160, | |
| "valid_targets_mean": 6050.9, | |
| "valid_targets_min": 634 | |
| }, | |
| { | |
| "epoch": 3.464, | |
| "grad_norm": 0.4542480312052559, | |
| "learning_rate": 1.0461406958198101e-05, | |
| "loss": 0.321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12608930468559265, | |
| "step": 2165, | |
| "valid_targets_mean": 4816.9, | |
| "valid_targets_min": 879 | |
| }, | |
| { | |
| "epoch": 3.472, | |
| "grad_norm": 0.5112301560829176, | |
| "learning_rate": 1.0363360127644235e-05, | |
| "loss": 0.3444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21885031461715698, | |
| "step": 2170, | |
| "valid_targets_mean": 6240.8, | |
| "valid_targets_min": 1414 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "grad_norm": 0.5442050920516187, | |
| "learning_rate": 1.0265613997224255e-05, | |
| "loss": 0.3139, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1261734962463379, | |
| "step": 2175, | |
| "valid_targets_mean": 3223.0, | |
| "valid_targets_min": 902 | |
| }, | |
| { | |
| "epoch": 3.488, | |
| "grad_norm": 0.4610573972043477, | |
| "learning_rate": 1.0168171616992422e-05, | |
| "loss": 0.3175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19308221340179443, | |
| "step": 2180, | |
| "valid_targets_mean": 6308.8, | |
| "valid_targets_min": 823 | |
| }, | |
| { | |
| "epoch": 3.496, | |
| "grad_norm": 0.5306677534892053, | |
| "learning_rate": 1.007103602752483e-05, | |
| "loss": 0.3402, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1896430402994156, | |
| "step": 2185, | |
| "valid_targets_mean": 3996.8, | |
| "valid_targets_min": 671 | |
| }, | |
| { | |
| "epoch": 3.504, | |
| "grad_norm": 0.5044165875993919, | |
| "learning_rate": 9.974210259824505e-06, | |
| "loss": 0.3336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16915544867515564, | |
| "step": 2190, | |
| "valid_targets_mean": 3629.6, | |
| "valid_targets_min": 1033 | |
| }, | |
| { | |
| "epoch": 3.512, | |
| "grad_norm": 0.6879187845686496, | |
| "learning_rate": 9.877697335226872e-06, | |
| "loss": 0.3439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10717498511075974, | |
| "step": 2195, | |
| "valid_targets_mean": 2186.5, | |
| "valid_targets_min": 687 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 0.6250839989818192, | |
| "learning_rate": 9.781500265305448e-06, | |
| "loss": 0.3357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1784055531024933, | |
| "step": 2200, | |
| "valid_targets_mean": 3010.9, | |
| "valid_targets_min": 720 | |
| }, | |
| { | |
| "epoch": 3.528, | |
| "grad_norm": 0.5248629910937874, | |
| "learning_rate": 9.685622051777856e-06, | |
| "loss": 0.3092, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23266291618347168, | |
| "step": 2205, | |
| "valid_targets_mean": 5086.4, | |
| "valid_targets_min": 787 | |
| }, | |
| { | |
| "epoch": 3.536, | |
| "grad_norm": 0.4834179595767226, | |
| "learning_rate": 9.590065686412182e-06, | |
| "loss": 0.3233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13310745358467102, | |
| "step": 2210, | |
| "valid_targets_mean": 2614.6, | |
| "valid_targets_min": 925 | |
| }, | |
| { | |
| "epoch": 3.544, | |
| "grad_norm": 0.45602768383743014, | |
| "learning_rate": 9.494834150933616e-06, | |
| "loss": 0.3297, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1372455358505249, | |
| "step": 2215, | |
| "valid_targets_mean": 3843.4, | |
| "valid_targets_min": 1099 | |
| }, | |
| { | |
| "epoch": 3.552, | |
| "grad_norm": 0.4887447107244825, | |
| "learning_rate": 9.399930416931404e-06, | |
| "loss": 0.314, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10885452479124069, | |
| "step": 2220, | |
| "valid_targets_mean": 3725.2, | |
| "valid_targets_min": 872 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "grad_norm": 0.5151465841953303, | |
| "learning_rate": 9.30535744576615e-06, | |
| "loss": 0.3353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13782528042793274, | |
| "step": 2225, | |
| "valid_targets_mean": 3750.6, | |
| "valid_targets_min": 1124 | |
| }, | |
| { | |
| "epoch": 3.568, | |
| "grad_norm": 0.5623056848959576, | |
| "learning_rate": 9.211118188477362e-06, | |
| "loss": 0.3311, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2320508360862732, | |
| "step": 2230, | |
| "valid_targets_mean": 4508.8, | |
| "valid_targets_min": 662 | |
| }, | |
| { | |
| "epoch": 3.576, | |
| "grad_norm": 0.5178467063324498, | |
| "learning_rate": 9.117215585691408e-06, | |
| "loss": 0.3228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17334342002868652, | |
| "step": 2235, | |
| "valid_targets_mean": 4636.2, | |
| "valid_targets_min": 916 | |
| }, | |
| { | |
| "epoch": 3.584, | |
| "grad_norm": 0.6120278120276946, | |
| "learning_rate": 9.023652567529744e-06, | |
| "loss": 0.3263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11503614485263824, | |
| "step": 2240, | |
| "valid_targets_mean": 2788.6, | |
| "valid_targets_min": 588 | |
| }, | |
| { | |
| "epoch": 3.592, | |
| "grad_norm": 0.4937083479890421, | |
| "learning_rate": 8.930432053517465e-06, | |
| "loss": 0.3191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11475160717964172, | |
| "step": 2245, | |
| "valid_targets_mean": 2826.0, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 0.49253074619254694, | |
| "learning_rate": 8.837556952492264e-06, | |
| "loss": 0.3207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14227665960788727, | |
| "step": 2250, | |
| "valid_targets_mean": 4518.2, | |
| "valid_targets_min": 925 | |
| }, | |
| { | |
| "epoch": 3.608, | |
| "grad_norm": 0.7363097537758285, | |
| "learning_rate": 8.745030162513582e-06, | |
| "loss": 0.3292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21330423653125763, | |
| "step": 2255, | |
| "valid_targets_mean": 3404.9, | |
| "valid_targets_min": 874 | |
| }, | |
| { | |
| "epoch": 3.616, | |
| "grad_norm": 0.4414100721223384, | |
| "learning_rate": 8.652854570772236e-06, | |
| "loss": 0.3232, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12081056833267212, | |
| "step": 2260, | |
| "valid_targets_mean": 4392.4, | |
| "valid_targets_min": 984 | |
| }, | |
| { | |
| "epoch": 3.624, | |
| "grad_norm": 0.5606075707240097, | |
| "learning_rate": 8.561033053500312e-06, | |
| "loss": 0.3213, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08863774687051773, | |
| "step": 2265, | |
| "valid_targets_mean": 1930.2, | |
| "valid_targets_min": 685 | |
| }, | |
| { | |
| "epoch": 3.632, | |
| "grad_norm": 0.47128304721158626, | |
| "learning_rate": 8.46956847588141e-06, | |
| "loss": 0.3523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12015935778617859, | |
| "step": 2270, | |
| "valid_targets_mean": 3377.5, | |
| "valid_targets_min": 815 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 0.5125340858044971, | |
| "learning_rate": 8.378463691961237e-06, | |
| "loss": 0.2898, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14381183683872223, | |
| "step": 2275, | |
| "valid_targets_mean": 3659.1, | |
| "valid_targets_min": 491 | |
| }, | |
| { | |
| "epoch": 3.648, | |
| "grad_norm": 0.4592889282450072, | |
| "learning_rate": 8.287721544558574e-06, | |
| "loss": 0.3185, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10181337594985962, | |
| "step": 2280, | |
| "valid_targets_mean": 3632.9, | |
| "valid_targets_min": 728 | |
| }, | |
| { | |
| "epoch": 3.656, | |
| "grad_norm": 0.5965684017173326, | |
| "learning_rate": 8.197344865176548e-06, | |
| "loss": 0.3197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12914949655532837, | |
| "step": 2285, | |
| "valid_targets_mean": 3181.9, | |
| "valid_targets_min": 1078 | |
| }, | |
| { | |
| "epoch": 3.664, | |
| "grad_norm": 0.6228428043708024, | |
| "learning_rate": 8.10733647391427e-06, | |
| "loss": 0.355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1971237063407898, | |
| "step": 2290, | |
| "valid_targets_mean": 3335.1, | |
| "valid_targets_min": 791 | |
| }, | |
| { | |
| "epoch": 3.672, | |
| "grad_norm": 0.5979530685752266, | |
| "learning_rate": 8.017699179378849e-06, | |
| "loss": 0.3396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22264941036701202, | |
| "step": 2295, | |
| "valid_targets_mean": 3038.4, | |
| "valid_targets_min": 830 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 0.4458519642564752, | |
| "learning_rate": 7.928435778597763e-06, | |
| "loss": 0.3209, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1323288083076477, | |
| "step": 2300, | |
| "valid_targets_mean": 3766.8, | |
| "valid_targets_min": 874 | |
| }, | |
| { | |
| "epoch": 3.6879999999999997, | |
| "grad_norm": 0.5205119846178907, | |
| "learning_rate": 7.839549056931557e-06, | |
| "loss": 0.3348, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16188280284404755, | |
| "step": 2305, | |
| "valid_targets_mean": 3806.9, | |
| "valid_targets_min": 1001 | |
| }, | |
| { | |
| "epoch": 3.6959999999999997, | |
| "grad_norm": 0.6321612059563577, | |
| "learning_rate": 7.751041787986965e-06, | |
| "loss": 0.3147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20028778910636902, | |
| "step": 2310, | |
| "valid_targets_mean": 5492.1, | |
| "valid_targets_min": 905 | |
| }, | |
| { | |
| "epoch": 3.7039999999999997, | |
| "grad_norm": 0.4856071600672378, | |
| "learning_rate": 7.662916733530317e-06, | |
| "loss": 0.2987, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20045016705989838, | |
| "step": 2315, | |
| "valid_targets_mean": 5160.9, | |
| "valid_targets_min": 1459 | |
| }, | |
| { | |
| "epoch": 3.7119999999999997, | |
| "grad_norm": 0.6190037743730318, | |
| "learning_rate": 7.575176643401394e-06, | |
| "loss": 0.3144, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19084815680980682, | |
| "step": 2320, | |
| "valid_targets_mean": 3074.1, | |
| "valid_targets_min": 740 | |
| }, | |
| { | |
| "epoch": 3.7199999999999998, | |
| "grad_norm": 0.47802774359013706, | |
| "learning_rate": 7.487824255427616e-06, | |
| "loss": 0.3462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2085719108581543, | |
| "step": 2325, | |
| "valid_targets_mean": 5742.2, | |
| "valid_targets_min": 1804 | |
| }, | |
| { | |
| "epoch": 3.7279999999999998, | |
| "grad_norm": 0.6141982017666223, | |
| "learning_rate": 7.400862295338595e-06, | |
| "loss": 0.319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1338297575712204, | |
| "step": 2330, | |
| "valid_targets_mean": 3026.8, | |
| "valid_targets_min": 869 | |
| }, | |
| { | |
| "epoch": 3.7359999999999998, | |
| "grad_norm": 0.4272699620447183, | |
| "learning_rate": 7.314293476681122e-06, | |
| "loss": 0.3269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12799906730651855, | |
| "step": 2335, | |
| "valid_targets_mean": 5400.1, | |
| "valid_targets_min": 920 | |
| }, | |
| { | |
| "epoch": 3.7439999999999998, | |
| "grad_norm": 0.558531390187902, | |
| "learning_rate": 7.228120500734443e-06, | |
| "loss": 0.3094, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1416444331407547, | |
| "step": 2340, | |
| "valid_targets_mean": 3904.5, | |
| "valid_targets_min": 665 | |
| }, | |
| { | |
| "epoch": 3.752, | |
| "grad_norm": 0.5957360903717863, | |
| "learning_rate": 7.1423460564259995e-06, | |
| "loss": 0.3211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1542271375656128, | |
| "step": 2345, | |
| "valid_targets_mean": 2498.1, | |
| "valid_targets_min": 725 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 0.5749982706805087, | |
| "learning_rate": 7.056972820247516e-06, | |
| "loss": 0.3403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24424612522125244, | |
| "step": 2350, | |
| "valid_targets_mean": 4599.0, | |
| "valid_targets_min": 586 | |
| }, | |
| { | |
| "epoch": 3.768, | |
| "grad_norm": 0.7004621373727494, | |
| "learning_rate": 6.97200345617149e-06, | |
| "loss": 0.3132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.261954128742218, | |
| "step": 2355, | |
| "valid_targets_mean": 3538.9, | |
| "valid_targets_min": 911 | |
| }, | |
| { | |
| "epoch": 3.776, | |
| "grad_norm": 0.4631600705700879, | |
| "learning_rate": 6.887440615568044e-06, | |
| "loss": 0.3387, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13582396507263184, | |
| "step": 2360, | |
| "valid_targets_mean": 4712.9, | |
| "valid_targets_min": 1222 | |
| }, | |
| { | |
| "epoch": 3.784, | |
| "grad_norm": 0.5495931240245518, | |
| "learning_rate": 6.803286937122233e-06, | |
| "loss": 0.3371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23861850798130035, | |
| "step": 2365, | |
| "valid_targets_mean": 4753.6, | |
| "valid_targets_min": 980 | |
| }, | |
| { | |
| "epoch": 3.792, | |
| "grad_norm": 0.6024280451059767, | |
| "learning_rate": 6.719545046751674e-06, | |
| "loss": 0.3315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21030789613723755, | |
| "step": 2370, | |
| "valid_targets_mean": 2878.9, | |
| "valid_targets_min": 895 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 0.6147492284481915, | |
| "learning_rate": 6.636217557524605e-06, | |
| "loss": 0.3522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.161615788936615, | |
| "step": 2375, | |
| "valid_targets_mean": 2665.0, | |
| "valid_targets_min": 634 | |
| }, | |
| { | |
| "epoch": 3.808, | |
| "grad_norm": 0.5623305812954921, | |
| "learning_rate": 6.55330706957837e-06, | |
| "loss": 0.3327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2057649940252304, | |
| "step": 2380, | |
| "valid_targets_mean": 4465.9, | |
| "valid_targets_min": 1127 | |
| }, | |
| { | |
| "epoch": 3.816, | |
| "grad_norm": 0.5283338376179509, | |
| "learning_rate": 6.4708161700382655e-06, | |
| "loss": 0.3246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1588209867477417, | |
| "step": 2385, | |
| "valid_targets_mean": 3723.0, | |
| "valid_targets_min": 807 | |
| }, | |
| { | |
| "epoch": 3.824, | |
| "grad_norm": 0.4510470301147515, | |
| "learning_rate": 6.388747432936819e-06, | |
| "loss": 0.3195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12806157767772675, | |
| "step": 2390, | |
| "valid_targets_mean": 3841.9, | |
| "valid_targets_min": 981 | |
| }, | |
| { | |
| "epoch": 3.832, | |
| "grad_norm": 0.5436971472039873, | |
| "learning_rate": 6.3071034191334915e-06, | |
| "loss": 0.3228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1942981779575348, | |
| "step": 2395, | |
| "valid_targets_mean": 3798.4, | |
| "valid_targets_min": 732 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 0.5018428572701472, | |
| "learning_rate": 6.22588667623472e-06, | |
| "loss": 0.3207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13997101783752441, | |
| "step": 2400, | |
| "valid_targets_mean": 3812.2, | |
| "valid_targets_min": 708 | |
| }, | |
| { | |
| "epoch": 3.848, | |
| "grad_norm": 0.582055372382792, | |
| "learning_rate": 6.145099738514466e-06, | |
| "loss": 0.3218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2213214933872223, | |
| "step": 2405, | |
| "valid_targets_mean": 3403.4, | |
| "valid_targets_min": 819 | |
| }, | |
| { | |
| "epoch": 3.856, | |
| "grad_norm": 0.583928362005403, | |
| "learning_rate": 6.064745126835112e-06, | |
| "loss": 0.3025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16777969896793365, | |
| "step": 2410, | |
| "valid_targets_mean": 3357.1, | |
| "valid_targets_min": 860 | |
| }, | |
| { | |
| "epoch": 3.864, | |
| "grad_norm": 0.6142754993162113, | |
| "learning_rate": 5.984825348568812e-06, | |
| "loss": 0.3007, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19814524054527283, | |
| "step": 2415, | |
| "valid_targets_mean": 2603.5, | |
| "valid_targets_min": 831 | |
| }, | |
| { | |
| "epoch": 3.872, | |
| "grad_norm": 0.4195895918414985, | |
| "learning_rate": 5.905342897519262e-06, | |
| "loss": 0.33, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12075480818748474, | |
| "step": 2420, | |
| "valid_targets_mean": 4224.9, | |
| "valid_targets_min": 551 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "grad_norm": 0.6108959292753345, | |
| "learning_rate": 5.826300253843851e-06, | |
| "loss": 0.3366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15773876011371613, | |
| "step": 2425, | |
| "valid_targets_mean": 2816.1, | |
| "valid_targets_min": 930 | |
| }, | |
| { | |
| "epoch": 3.888, | |
| "grad_norm": 0.5171359185903159, | |
| "learning_rate": 5.7476998839763035e-06, | |
| "loss": 0.2989, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13886040449142456, | |
| "step": 2430, | |
| "valid_targets_mean": 3425.9, | |
| "valid_targets_min": 789 | |
| }, | |
| { | |
| "epoch": 3.896, | |
| "grad_norm": 0.4763178741386556, | |
| "learning_rate": 5.669544240549698e-06, | |
| "loss": 0.3051, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16396956145763397, | |
| "step": 2435, | |
| "valid_targets_mean": 4035.2, | |
| "valid_targets_min": 893 | |
| }, | |
| { | |
| "epoch": 3.904, | |
| "grad_norm": 0.4785442559038061, | |
| "learning_rate": 5.591835762319946e-06, | |
| "loss": 0.3111, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11912888288497925, | |
| "step": 2440, | |
| "valid_targets_mean": 2858.9, | |
| "valid_targets_min": 566 | |
| }, | |
| { | |
| "epoch": 3.912, | |
| "grad_norm": 0.6233825072887711, | |
| "learning_rate": 5.514576874089683e-06, | |
| "loss": 0.3413, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1519429087638855, | |
| "step": 2445, | |
| "valid_targets_mean": 3033.6, | |
| "valid_targets_min": 559 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 0.5322851898274968, | |
| "learning_rate": 5.437769986632622e-06, | |
| "loss": 0.341, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1855153739452362, | |
| "step": 2450, | |
| "valid_targets_mean": 4399.1, | |
| "valid_targets_min": 1451 | |
| }, | |
| { | |
| "epoch": 3.928, | |
| "grad_norm": 0.44237959812688865, | |
| "learning_rate": 5.361417496618315e-06, | |
| "loss": 0.3117, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18377932906150818, | |
| "step": 2455, | |
| "valid_targets_mean": 6480.9, | |
| "valid_targets_min": 1407 | |
| }, | |
| { | |
| "epoch": 3.936, | |
| "grad_norm": 0.7143654129319893, | |
| "learning_rate": 5.285521786537368e-06, | |
| "loss": 0.3282, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15518513321876526, | |
| "step": 2460, | |
| "valid_targets_mean": 4235.6, | |
| "valid_targets_min": 1211 | |
| }, | |
| { | |
| "epoch": 3.944, | |
| "grad_norm": 0.4769151407862085, | |
| "learning_rate": 5.2100852246270975e-06, | |
| "loss": 0.3146, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21277496218681335, | |
| "step": 2465, | |
| "valid_targets_mean": 5719.1, | |
| "valid_targets_min": 829 | |
| }, | |
| { | |
| "epoch": 3.952, | |
| "grad_norm": 0.6838406068253756, | |
| "learning_rate": 5.135110164797637e-06, | |
| "loss": 0.3305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3378908634185791, | |
| "step": 2470, | |
| "valid_targets_mean": 3928.5, | |
| "valid_targets_min": 664 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "grad_norm": 0.6130883409393146, | |
| "learning_rate": 5.060598946558484e-06, | |
| "loss": 0.3349, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13703122735023499, | |
| "step": 2475, | |
| "valid_targets_mean": 2553.0, | |
| "valid_targets_min": 834 | |
| }, | |
| { | |
| "epoch": 3.968, | |
| "grad_norm": 0.4357023208751496, | |
| "learning_rate": 4.986553894945512e-06, | |
| "loss": 0.2976, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16759523749351501, | |
| "step": 2480, | |
| "valid_targets_mean": 4775.6, | |
| "valid_targets_min": 736 | |
| }, | |
| { | |
| "epoch": 3.976, | |
| "grad_norm": 0.638026763808748, | |
| "learning_rate": 4.912977320448391e-06, | |
| "loss": 0.3393, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2706812620162964, | |
| "step": 2485, | |
| "valid_targets_mean": 4390.4, | |
| "valid_targets_min": 1011 | |
| }, | |
| { | |
| "epoch": 3.984, | |
| "grad_norm": 0.6395434082611862, | |
| "learning_rate": 4.839871518938513e-06, | |
| "loss": 0.354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18830779194831848, | |
| "step": 2490, | |
| "valid_targets_mean": 3367.8, | |
| "valid_targets_min": 1225 | |
| }, | |
| { | |
| "epoch": 3.992, | |
| "grad_norm": 0.6045521377506226, | |
| "learning_rate": 4.767238771597347e-06, | |
| "loss": 0.3432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.167062908411026, | |
| "step": 2495, | |
| "valid_targets_mean": 2106.9, | |
| "valid_targets_min": 555 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.5970487451114057, | |
| "learning_rate": 4.695081344845254e-06, | |
| "loss": 0.3183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15452514588832855, | |
| "step": 2500, | |
| "valid_targets_mean": 3056.4, | |
| "valid_targets_min": 506 | |
| }, | |
| { | |
| "epoch": 4.008, | |
| "grad_norm": 0.4416015542929956, | |
| "learning_rate": 4.623401490270778e-06, | |
| "loss": 0.3266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1425788402557373, | |
| "step": 2505, | |
| "valid_targets_mean": 4244.4, | |
| "valid_targets_min": 1097 | |
| }, | |
| { | |
| "epoch": 4.016, | |
| "grad_norm": 0.6050783805568698, | |
| "learning_rate": 4.552201444560373e-06, | |
| "loss": 0.3079, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13076642155647278, | |
| "step": 2510, | |
| "valid_targets_mean": 3324.1, | |
| "valid_targets_min": 1050 | |
| }, | |
| { | |
| "epoch": 4.024, | |
| "grad_norm": 0.7064956382591235, | |
| "learning_rate": 4.481483429428615e-06, | |
| "loss": 0.3301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13634774088859558, | |
| "step": 2515, | |
| "valid_targets_mean": 2014.1, | |
| "valid_targets_min": 940 | |
| }, | |
| { | |
| "epoch": 4.032, | |
| "grad_norm": 0.49373001353281315, | |
| "learning_rate": 4.4112496515488765e-06, | |
| "loss": 0.2945, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15392452478408813, | |
| "step": 2520, | |
| "valid_targets_mean": 3495.6, | |
| "valid_targets_min": 467 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 0.5279529664688594, | |
| "learning_rate": 4.341502302484472e-06, | |
| "loss": 0.3109, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19533191621303558, | |
| "step": 2525, | |
| "valid_targets_mean": 4519.1, | |
| "valid_targets_min": 1176 | |
| }, | |
| { | |
| "epoch": 4.048, | |
| "grad_norm": 0.5002800878652447, | |
| "learning_rate": 4.272243558620264e-06, | |
| "loss": 0.2897, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14053988456726074, | |
| "step": 2530, | |
| "valid_targets_mean": 3120.2, | |
| "valid_targets_min": 1102 | |
| }, | |
| { | |
| "epoch": 4.056, | |
| "grad_norm": 0.6522314422022066, | |
| "learning_rate": 4.203475581094771e-06, | |
| "loss": 0.3082, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20415924489498138, | |
| "step": 2535, | |
| "valid_targets_mean": 3365.0, | |
| "valid_targets_min": 1171 | |
| }, | |
| { | |
| "epoch": 4.064, | |
| "grad_norm": 0.3871114184970602, | |
| "learning_rate": 4.135200515732716e-06, | |
| "loss": 0.3235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13306647539138794, | |
| "step": 2540, | |
| "valid_targets_mean": 6466.8, | |
| "valid_targets_min": 1014 | |
| }, | |
| { | |
| "epoch": 4.072, | |
| "grad_norm": 0.5275110629270116, | |
| "learning_rate": 4.067420492978065e-06, | |
| "loss": 0.3177, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19687163829803467, | |
| "step": 2545, | |
| "valid_targets_mean": 4931.0, | |
| "valid_targets_min": 1092 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 0.6130234035544103, | |
| "learning_rate": 4.000137627827554e-06, | |
| "loss": 0.3209, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16655772924423218, | |
| "step": 2550, | |
| "valid_targets_mean": 2530.0, | |
| "valid_targets_min": 1073 | |
| }, | |
| { | |
| "epoch": 4.088, | |
| "grad_norm": 0.38814066490279997, | |
| "learning_rate": 3.9333540197647035e-06, | |
| "loss": 0.2997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16146323084831238, | |
| "step": 2555, | |
| "valid_targets_mean": 7966.8, | |
| "valid_targets_min": 1103 | |
| }, | |
| { | |
| "epoch": 4.096, | |
| "grad_norm": 0.4590471476508371, | |
| "learning_rate": 3.867071752694282e-06, | |
| "loss": 0.304, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1653611660003662, | |
| "step": 2560, | |
| "valid_targets_mean": 4412.4, | |
| "valid_targets_min": 602 | |
| }, | |
| { | |
| "epoch": 4.104, | |
| "grad_norm": 0.5272846889528702, | |
| "learning_rate": 3.8012928948773243e-06, | |
| "loss": 0.3173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18114957213401794, | |
| "step": 2565, | |
| "valid_targets_mean": 4442.1, | |
| "valid_targets_min": 1003 | |
| }, | |
| { | |
| "epoch": 4.112, | |
| "grad_norm": 0.5186772440314499, | |
| "learning_rate": 3.7360194988665364e-06, | |
| "loss": 0.2818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13564951717853546, | |
| "step": 2570, | |
| "valid_targets_mean": 3933.4, | |
| "valid_targets_min": 1004 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "grad_norm": 0.4328083139216796, | |
| "learning_rate": 3.6712536014422885e-06, | |
| "loss": 0.3033, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12977717816829681, | |
| "step": 2575, | |
| "valid_targets_mean": 5225.9, | |
| "valid_targets_min": 871 | |
| }, | |
| { | |
| "epoch": 4.128, | |
| "grad_norm": 0.6420338275118762, | |
| "learning_rate": 3.606997223549049e-06, | |
| "loss": 0.3142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20889759063720703, | |
| "step": 2580, | |
| "valid_targets_mean": 3776.1, | |
| "valid_targets_min": 844 | |
| }, | |
| { | |
| "epoch": 4.136, | |
| "grad_norm": 0.5245741332938453, | |
| "learning_rate": 3.543252370232313e-06, | |
| "loss": 0.3056, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1626463085412979, | |
| "step": 2585, | |
| "valid_targets_mean": 4243.8, | |
| "valid_targets_min": 1076 | |
| }, | |
| { | |
| "epoch": 4.144, | |
| "grad_norm": 0.6009260362686207, | |
| "learning_rate": 3.4800210305760662e-06, | |
| "loss": 0.3222, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20430579781532288, | |
| "step": 2590, | |
| "valid_targets_mean": 4037.8, | |
| "valid_targets_min": 872 | |
| }, | |
| { | |
| "epoch": 4.152, | |
| "grad_norm": 0.6474293755424925, | |
| "learning_rate": 3.4173051776406817e-06, | |
| "loss": 0.334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17541387677192688, | |
| "step": 2595, | |
| "valid_targets_mean": 3601.9, | |
| "valid_targets_min": 709 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 0.5299682943737077, | |
| "learning_rate": 3.3551067684013706e-06, | |
| "loss": 0.3194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17366911470890045, | |
| "step": 2600, | |
| "valid_targets_mean": 4566.0, | |
| "valid_targets_min": 797 | |
| }, | |
| { | |
| "epoch": 4.168, | |
| "grad_norm": 0.5343201098430208, | |
| "learning_rate": 3.2934277436871187e-06, | |
| "loss": 0.3082, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18967169523239136, | |
| "step": 2605, | |
| "valid_targets_mean": 5175.8, | |
| "valid_targets_min": 803 | |
| }, | |
| { | |
| "epoch": 4.176, | |
| "grad_norm": 0.706532685786007, | |
| "learning_rate": 3.232270028120121e-06, | |
| "loss": 0.3535, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22153635323047638, | |
| "step": 2610, | |
| "valid_targets_mean": 3667.5, | |
| "valid_targets_min": 1042 | |
| }, | |
| { | |
| "epoch": 4.184, | |
| "grad_norm": 0.6583861635037094, | |
| "learning_rate": 3.1716355300557256e-06, | |
| "loss": 0.3241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21890048682689667, | |
| "step": 2615, | |
| "valid_targets_mean": 3347.5, | |
| "valid_targets_min": 993 | |
| }, | |
| { | |
| "epoch": 4.192, | |
| "grad_norm": 0.4972479392885182, | |
| "learning_rate": 3.111526141522896e-06, | |
| "loss": 0.2942, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23314082622528076, | |
| "step": 2620, | |
| "valid_targets_mean": 5941.9, | |
| "valid_targets_min": 960 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 0.42587188064481973, | |
| "learning_rate": 3.0519437381651507e-06, | |
| "loss": 0.3007, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13505974411964417, | |
| "step": 2625, | |
| "valid_targets_mean": 4900.6, | |
| "valid_targets_min": 1129 | |
| }, | |
| { | |
| "epoch": 4.208, | |
| "grad_norm": 0.568223927683394, | |
| "learning_rate": 2.992890179182062e-06, | |
| "loss": 0.3146, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19689098000526428, | |
| "step": 2630, | |
| "valid_targets_mean": 4559.5, | |
| "valid_targets_min": 930 | |
| }, | |
| { | |
| "epoch": 4.216, | |
| "grad_norm": 0.3833405874743467, | |
| "learning_rate": 2.93436730727122e-06, | |
| "loss": 0.3283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21802669763565063, | |
| "step": 2635, | |
| "valid_targets_mean": 13101.0, | |
| "valid_targets_min": 704 | |
| }, | |
| { | |
| "epoch": 4.224, | |
| "grad_norm": 0.4360867692750178, | |
| "learning_rate": 2.8763769485707447e-06, | |
| "loss": 0.3004, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1010446846485138, | |
| "step": 2640, | |
| "valid_targets_mean": 3420.9, | |
| "valid_targets_min": 694 | |
| }, | |
| { | |
| "epoch": 4.232, | |
| "grad_norm": 0.5394563271831259, | |
| "learning_rate": 2.818920912602294e-06, | |
| "loss": 0.3053, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08956138789653778, | |
| "step": 2645, | |
| "valid_targets_mean": 2421.5, | |
| "valid_targets_min": 799 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "grad_norm": 0.5364665203536897, | |
| "learning_rate": 2.762000992214626e-06, | |
| "loss": 0.3032, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13476410508155823, | |
| "step": 2650, | |
| "valid_targets_mean": 4055.1, | |
| "valid_targets_min": 751 | |
| }, | |
| { | |
| "epoch": 4.248, | |
| "grad_norm": 0.6310235572259564, | |
| "learning_rate": 2.7056189635276162e-06, | |
| "loss": 0.3064, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2394050657749176, | |
| "step": 2655, | |
| "valid_targets_mean": 4607.8, | |
| "valid_targets_min": 691 | |
| }, | |
| { | |
| "epoch": 4.256, | |
| "grad_norm": 0.5141156610492511, | |
| "learning_rate": 2.6497765858768643e-06, | |
| "loss": 0.3108, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19690141081809998, | |
| "step": 2660, | |
| "valid_targets_mean": 4163.4, | |
| "valid_targets_min": 1429 | |
| }, | |
| { | |
| "epoch": 4.264, | |
| "grad_norm": 0.60664896327067, | |
| "learning_rate": 2.594475601758786e-06, | |
| "loss": 0.3045, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21876487135887146, | |
| "step": 2665, | |
| "valid_targets_mean": 3978.0, | |
| "valid_targets_min": 1162 | |
| }, | |
| { | |
| "epoch": 4.272, | |
| "grad_norm": 0.586279917157946, | |
| "learning_rate": 2.539717736776237e-06, | |
| "loss": 0.3262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15984392166137695, | |
| "step": 2670, | |
| "valid_targets_mean": 3679.0, | |
| "valid_targets_min": 819 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "grad_norm": 0.5679469104180861, | |
| "learning_rate": 2.4855046995846844e-06, | |
| "loss": 0.2905, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1231120154261589, | |
| "step": 2675, | |
| "valid_targets_mean": 2242.6, | |
| "valid_targets_min": 596 | |
| }, | |
| { | |
| "epoch": 4.288, | |
| "grad_norm": 0.5442418621257851, | |
| "learning_rate": 2.431838181838868e-06, | |
| "loss": 0.3294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15020456910133362, | |
| "step": 2680, | |
| "valid_targets_mean": 4034.4, | |
| "valid_targets_min": 746 | |
| }, | |
| { | |
| "epoch": 4.296, | |
| "grad_norm": 0.6563588104632252, | |
| "learning_rate": 2.3787198581400285e-06, | |
| "loss": 0.3679, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19800585508346558, | |
| "step": 2685, | |
| "valid_targets_mean": 3281.4, | |
| "valid_targets_min": 716 | |
| }, | |
| { | |
| "epoch": 4.304, | |
| "grad_norm": 0.6587761603032072, | |
| "learning_rate": 2.3261513859836437e-06, | |
| "loss": 0.3175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2168753743171692, | |
| "step": 2690, | |
| "valid_targets_mean": 3552.4, | |
| "valid_targets_min": 1376 | |
| }, | |
| { | |
| "epoch": 4.312, | |
| "grad_norm": 0.5427138569442952, | |
| "learning_rate": 2.27413440570772e-06, | |
| "loss": 0.2975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13280799984931946, | |
| "step": 2695, | |
| "valid_targets_mean": 2905.9, | |
| "valid_targets_min": 831 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "grad_norm": 0.4228524942163614, | |
| "learning_rate": 2.222670540441596e-06, | |
| "loss": 0.3268, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11188645660877228, | |
| "step": 2700, | |
| "valid_targets_mean": 3543.2, | |
| "valid_targets_min": 1280 | |
| }, | |
| { | |
| "epoch": 4.328, | |
| "grad_norm": 0.4899573286523899, | |
| "learning_rate": 2.17176139605531e-06, | |
| "loss": 0.3425, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2020570933818817, | |
| "step": 2705, | |
| "valid_targets_mean": 5368.1, | |
| "valid_targets_min": 704 | |
| }, | |
| { | |
| "epoch": 4.336, | |
| "grad_norm": 0.507636144967628, | |
| "learning_rate": 2.121408561109466e-06, | |
| "loss": 0.3147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18471331894397736, | |
| "step": 2710, | |
| "valid_targets_mean": 5824.5, | |
| "valid_targets_min": 645 | |
| }, | |
| { | |
| "epoch": 4.344, | |
| "grad_norm": 0.3901707775687487, | |
| "learning_rate": 2.071613606805696e-06, | |
| "loss": 0.2986, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08599360287189484, | |
| "step": 2715, | |
| "valid_targets_mean": 3438.6, | |
| "valid_targets_min": 700 | |
| }, | |
| { | |
| "epoch": 4.352, | |
| "grad_norm": 0.5742155815605969, | |
| "learning_rate": 2.0223780869376018e-06, | |
| "loss": 0.3148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15944108366966248, | |
| "step": 2720, | |
| "valid_targets_mean": 3927.5, | |
| "valid_targets_min": 1011 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "grad_norm": 0.419179061441119, | |
| "learning_rate": 1.9737035378422907e-06, | |
| "loss": 0.334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14531302452087402, | |
| "step": 2725, | |
| "valid_targets_mean": 5146.2, | |
| "valid_targets_min": 1062 | |
| }, | |
| { | |
| "epoch": 4.368, | |
| "grad_norm": 0.5893213103013761, | |
| "learning_rate": 1.925591478352424e-06, | |
| "loss": 0.3056, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16566333174705505, | |
| "step": 2730, | |
| "valid_targets_mean": 4073.5, | |
| "valid_targets_min": 786 | |
| }, | |
| { | |
| "epoch": 4.376, | |
| "grad_norm": 0.5083568367696376, | |
| "learning_rate": 1.8780434097488443e-06, | |
| "loss": 0.3219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11135862022638321, | |
| "step": 2735, | |
| "valid_targets_mean": 2977.8, | |
| "valid_targets_min": 740 | |
| }, | |
| { | |
| "epoch": 4.384, | |
| "grad_norm": 0.5970456690160377, | |
| "learning_rate": 1.831060815713699e-06, | |
| "loss": 0.3217, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15850530564785004, | |
| "step": 2740, | |
| "valid_targets_mean": 3731.6, | |
| "valid_targets_min": 738 | |
| }, | |
| { | |
| "epoch": 4.392, | |
| "grad_norm": 0.4352437971339595, | |
| "learning_rate": 1.7846451622841643e-06, | |
| "loss": 0.3126, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16310745477676392, | |
| "step": 2745, | |
| "valid_targets_mean": 4859.1, | |
| "valid_targets_min": 763 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 0.7220488054568065, | |
| "learning_rate": 1.7387978978066988e-06, | |
| "loss": 0.3273, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12236481159925461, | |
| "step": 2750, | |
| "valid_targets_mean": 2560.9, | |
| "valid_targets_min": 833 | |
| }, | |
| { | |
| "epoch": 4.408, | |
| "grad_norm": 0.4482430441589336, | |
| "learning_rate": 1.6935204528918347e-06, | |
| "loss": 0.3229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15221011638641357, | |
| "step": 2755, | |
| "valid_targets_mean": 4397.8, | |
| "valid_targets_min": 879 | |
| }, | |
| { | |
| "epoch": 4.416, | |
| "grad_norm": 0.6616631272031211, | |
| "learning_rate": 1.6488142403695651e-06, | |
| "loss": 0.3323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2810991704463959, | |
| "step": 2760, | |
| "valid_targets_mean": 4374.9, | |
| "valid_targets_min": 634 | |
| }, | |
| { | |
| "epoch": 4.424, | |
| "grad_norm": 0.6231637576743879, | |
| "learning_rate": 1.6046806552452254e-06, | |
| "loss": 0.3248, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1512073576450348, | |
| "step": 2765, | |
| "valid_targets_mean": 2162.0, | |
| "valid_targets_min": 717 | |
| }, | |
| { | |
| "epoch": 4.432, | |
| "grad_norm": 0.546121704100876, | |
| "learning_rate": 1.5611210746559868e-06, | |
| "loss": 0.3282, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1441936194896698, | |
| "step": 2770, | |
| "valid_targets_mean": 3542.0, | |
| "valid_targets_min": 916 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "grad_norm": 0.539850864305549, | |
| "learning_rate": 1.5181368578278744e-06, | |
| "loss": 0.3084, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14854803681373596, | |
| "step": 2775, | |
| "valid_targets_mean": 3258.1, | |
| "valid_targets_min": 1298 | |
| }, | |
| { | |
| "epoch": 4.448, | |
| "grad_norm": 0.5230541376311952, | |
| "learning_rate": 1.4757293460333566e-06, | |
| "loss": 0.3112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14559073746204376, | |
| "step": 2780, | |
| "valid_targets_mean": 3888.8, | |
| "valid_targets_min": 896 | |
| }, | |
| { | |
| "epoch": 4.456, | |
| "grad_norm": 0.49941833844922867, | |
| "learning_rate": 1.4338998625494905e-06, | |
| "loss": 0.3233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15301160514354706, | |
| "step": 2785, | |
| "valid_targets_mean": 4101.0, | |
| "valid_targets_min": 871 | |
| }, | |
| { | |
| "epoch": 4.464, | |
| "grad_norm": 0.5879287108121554, | |
| "learning_rate": 1.3926497126166405e-06, | |
| "loss": 0.3202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16979511082172394, | |
| "step": 2790, | |
| "valid_targets_mean": 4361.5, | |
| "valid_targets_min": 843 | |
| }, | |
| { | |
| "epoch": 4.4719999999999995, | |
| "grad_norm": 0.491233445234694, | |
| "learning_rate": 1.3519801833977298e-06, | |
| "loss": 0.3089, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16678255796432495, | |
| "step": 2795, | |
| "valid_targets_mean": 4108.4, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 0.4766886110365394, | |
| "learning_rate": 1.3118925439381003e-06, | |
| "loss": 0.2904, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12528201937675476, | |
| "step": 2800, | |
| "valid_targets_mean": 3777.0, | |
| "valid_targets_min": 1264 | |
| }, | |
| { | |
| "epoch": 4.4879999999999995, | |
| "grad_norm": 0.47462065492059774, | |
| "learning_rate": 1.2723880451258918e-06, | |
| "loss": 0.3051, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17402642965316772, | |
| "step": 2805, | |
| "valid_targets_mean": 6122.8, | |
| "valid_targets_min": 3206 | |
| }, | |
| { | |
| "epoch": 4.496, | |
| "grad_norm": 0.4937259588561042, | |
| "learning_rate": 1.2334679196530219e-06, | |
| "loss": 0.3576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16844210028648376, | |
| "step": 2810, | |
| "valid_targets_mean": 5449.5, | |
| "valid_targets_min": 887 | |
| }, | |
| { | |
| "epoch": 4.504, | |
| "grad_norm": 0.5195655183538852, | |
| "learning_rate": 1.1951333819767163e-06, | |
| "loss": 0.3071, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1690118908882141, | |
| "step": 2815, | |
| "valid_targets_mean": 4369.6, | |
| "valid_targets_min": 721 | |
| }, | |
| { | |
| "epoch": 4.5120000000000005, | |
| "grad_norm": 0.645008699848017, | |
| "learning_rate": 1.157385628281622e-06, | |
| "loss": 0.3122, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16069874167442322, | |
| "step": 2820, | |
| "valid_targets_mean": 2885.9, | |
| "valid_targets_min": 889 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "grad_norm": 0.46142203647435265, | |
| "learning_rate": 1.1202258364424633e-06, | |
| "loss": 0.281, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10793192684650421, | |
| "step": 2825, | |
| "valid_targets_mean": 4069.0, | |
| "valid_targets_min": 757 | |
| }, | |
| { | |
| "epoch": 4.5280000000000005, | |
| "grad_norm": 0.5591910759629778, | |
| "learning_rate": 1.0836551659873073e-06, | |
| "loss": 0.312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1380782425403595, | |
| "step": 2830, | |
| "valid_targets_mean": 3617.9, | |
| "valid_targets_min": 661 | |
| }, | |
| { | |
| "epoch": 4.536, | |
| "grad_norm": 0.5024384662282567, | |
| "learning_rate": 1.0476747580613723e-06, | |
| "loss": 0.3243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14395056664943695, | |
| "step": 2835, | |
| "valid_targets_mean": 4670.5, | |
| "valid_targets_min": 895 | |
| }, | |
| { | |
| "epoch": 4.5440000000000005, | |
| "grad_norm": 0.4960916051732461, | |
| "learning_rate": 1.012285735391416e-06, | |
| "loss": 0.3108, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1764678955078125, | |
| "step": 2840, | |
| "valid_targets_mean": 4985.9, | |
| "valid_targets_min": 404 | |
| }, | |
| { | |
| "epoch": 4.552, | |
| "grad_norm": 0.5442146212646635, | |
| "learning_rate": 9.774892022507166e-07, | |
| "loss": 0.319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13719762861728668, | |
| "step": 2845, | |
| "valid_targets_mean": 3678.2, | |
| "valid_targets_min": 1021 | |
| }, | |
| { | |
| "epoch": 4.5600000000000005, | |
| "grad_norm": 0.5874551803498684, | |
| "learning_rate": 9.432862444245994e-07, | |
| "loss": 0.3123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1982530653476715, | |
| "step": 2850, | |
| "valid_targets_mean": 3730.6, | |
| "valid_targets_min": 800 | |
| }, | |
| { | |
| "epoch": 4.568, | |
| "grad_norm": 0.5392307118781249, | |
| "learning_rate": 9.096779291765667e-07, | |
| "loss": 0.3304, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21671076118946075, | |
| "step": 2855, | |
| "valid_targets_mean": 5178.0, | |
| "valid_targets_min": 1265 | |
| }, | |
| { | |
| "epoch": 4.576, | |
| "grad_norm": 0.6120562795608593, | |
| "learning_rate": 8.766653052149831e-07, | |
| "loss": 0.3354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20868843793869019, | |
| "step": 2860, | |
| "valid_targets_mean": 3738.2, | |
| "valid_targets_min": 1599 | |
| }, | |
| { | |
| "epoch": 4.584, | |
| "grad_norm": 0.7117821394769693, | |
| "learning_rate": 8.442494026603709e-07, | |
| "loss": 0.3398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16895297169685364, | |
| "step": 2865, | |
| "valid_targets_mean": 2438.6, | |
| "valid_targets_min": 943 | |
| }, | |
| { | |
| "epoch": 4.592, | |
| "grad_norm": 0.6622067516789324, | |
| "learning_rate": 8.124312330132423e-07, | |
| "loss": 0.3267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11896079778671265, | |
| "step": 2870, | |
| "valid_targets_mean": 2087.6, | |
| "valid_targets_min": 868 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 0.5844121637950128, | |
| "learning_rate": 7.812117891225667e-07, | |
| "loss": 0.2815, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15796557068824768, | |
| "step": 2875, | |
| "valid_targets_mean": 3266.6, | |
| "valid_targets_min": 1136 | |
| }, | |
| { | |
| "epoch": 4.608, | |
| "grad_norm": 0.6396259105660784, | |
| "learning_rate": 7.505920451547544e-07, | |
| "loss": 0.2991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.193174809217453, | |
| "step": 2880, | |
| "valid_targets_mean": 4270.2, | |
| "valid_targets_min": 972 | |
| }, | |
| { | |
| "epoch": 4.616, | |
| "grad_norm": 0.45054830264551554, | |
| "learning_rate": 7.205729565632947e-07, | |
| "loss": 0.3064, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11880641430616379, | |
| "step": 2885, | |
| "valid_targets_mean": 2952.6, | |
| "valid_targets_min": 1052 | |
| }, | |
| { | |
| "epoch": 4.624, | |
| "grad_norm": 0.5867839344286957, | |
| "learning_rate": 6.911554600589121e-07, | |
| "loss": 0.3213, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1453874111175537, | |
| "step": 2890, | |
| "valid_targets_mean": 3303.1, | |
| "valid_targets_min": 944 | |
| }, | |
| { | |
| "epoch": 4.632, | |
| "grad_norm": 0.66861340671234, | |
| "learning_rate": 6.62340473580354e-07, | |
| "loss": 0.3191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14077623188495636, | |
| "step": 2895, | |
| "valid_targets_mean": 2540.4, | |
| "valid_targets_min": 1033 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "grad_norm": 0.5948561509092989, | |
| "learning_rate": 6.341288962657422e-07, | |
| "loss": 0.3266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11293820291757584, | |
| "step": 2900, | |
| "valid_targets_mean": 2276.4, | |
| "valid_targets_min": 1000 | |
| }, | |
| { | |
| "epoch": 4.648, | |
| "grad_norm": 0.4492558783161307, | |
| "learning_rate": 6.06521608424524e-07, | |
| "loss": 0.3436, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11917877197265625, | |
| "step": 2905, | |
| "valid_targets_mean": 2935.6, | |
| "valid_targets_min": 697 | |
| }, | |
| { | |
| "epoch": 4.656, | |
| "grad_norm": 0.5260049148715912, | |
| "learning_rate": 5.795194715099905e-07, | |
| "loss": 0.3103, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11105750501155853, | |
| "step": 2910, | |
| "valid_targets_mean": 2840.0, | |
| "valid_targets_min": 847 | |
| }, | |
| { | |
| "epoch": 4.664, | |
| "grad_norm": 0.6187219070281177, | |
| "learning_rate": 5.531233280924042e-07, | |
| "loss": 0.3064, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18402305245399475, | |
| "step": 2915, | |
| "valid_targets_mean": 3376.0, | |
| "valid_targets_min": 899 | |
| }, | |
| { | |
| "epoch": 4.672, | |
| "grad_norm": 0.6718889715232024, | |
| "learning_rate": 5.273340018327044e-07, | |
| "loss": 0.3303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19413933157920837, | |
| "step": 2920, | |
| "valid_targets_mean": 2698.1, | |
| "valid_targets_min": 727 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "grad_norm": 0.6687495999234568, | |
| "learning_rate": 5.02152297456806e-07, | |
| "loss": 0.3323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19689956307411194, | |
| "step": 2925, | |
| "valid_targets_mean": 2559.8, | |
| "valid_targets_min": 772 | |
| }, | |
| { | |
| "epoch": 4.688, | |
| "grad_norm": 0.5256698425754467, | |
| "learning_rate": 4.775790007304993e-07, | |
| "loss": 0.3013, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24560466408729553, | |
| "step": 2930, | |
| "valid_targets_mean": 6107.5, | |
| "valid_targets_min": 972 | |
| }, | |
| { | |
| "epoch": 4.696, | |
| "grad_norm": 0.5425974097527478, | |
| "learning_rate": 4.5361487843490924e-07, | |
| "loss": 0.293, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14793461561203003, | |
| "step": 2935, | |
| "valid_targets_mean": 3443.0, | |
| "valid_targets_min": 678 | |
| }, | |
| { | |
| "epoch": 4.704, | |
| "grad_norm": 0.4596549583922256, | |
| "learning_rate": 4.3026067834258667e-07, | |
| "loss": 0.299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16168315708637238, | |
| "step": 2940, | |
| "valid_targets_mean": 7192.1, | |
| "valid_targets_min": 1895 | |
| }, | |
| { | |
| "epoch": 4.712, | |
| "grad_norm": 0.52608460848762, | |
| "learning_rate": 4.0751712919417484e-07, | |
| "loss": 0.3069, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20969341695308685, | |
| "step": 2945, | |
| "valid_targets_mean": 4731.2, | |
| "valid_targets_min": 736 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 0.5530590908411556, | |
| "learning_rate": 3.853849406756549e-07, | |
| "loss": 0.2896, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1489768624305725, | |
| "step": 2950, | |
| "valid_targets_mean": 3561.1, | |
| "valid_targets_min": 1047 | |
| }, | |
| { | |
| "epoch": 4.728, | |
| "grad_norm": 0.6253292073895866, | |
| "learning_rate": 3.6386480339621886e-07, | |
| "loss": 0.3121, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15937970578670502, | |
| "step": 2955, | |
| "valid_targets_mean": 3324.0, | |
| "valid_targets_min": 1209 | |
| }, | |
| { | |
| "epoch": 4.736, | |
| "grad_norm": 0.4383199488774124, | |
| "learning_rate": 3.4295738886670925e-07, | |
| "loss": 0.2963, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12093428522348404, | |
| "step": 2960, | |
| "valid_targets_mean": 4741.6, | |
| "valid_targets_min": 901 | |
| }, | |
| { | |
| "epoch": 4.744, | |
| "grad_norm": 0.5811119572995426, | |
| "learning_rate": 3.226633494786668e-07, | |
| "loss": 0.3017, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11559713631868362, | |
| "step": 2965, | |
| "valid_targets_mean": 4799.9, | |
| "valid_targets_min": 775 | |
| }, | |
| { | |
| "epoch": 4.752, | |
| "grad_norm": 0.702215418406445, | |
| "learning_rate": 3.0298331848398033e-07, | |
| "loss": 0.3151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1337815821170807, | |
| "step": 2970, | |
| "valid_targets_mean": 2866.6, | |
| "valid_targets_min": 865 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "grad_norm": 0.42111483300594843, | |
| "learning_rate": 2.839179099751133e-07, | |
| "loss": 0.3159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09750033915042877, | |
| "step": 2975, | |
| "valid_targets_mean": 3362.2, | |
| "valid_targets_min": 909 | |
| }, | |
| { | |
| "epoch": 4.768, | |
| "grad_norm": 0.6679073549967152, | |
| "learning_rate": 2.654677188659549e-07, | |
| "loss": 0.2942, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24244052171707153, | |
| "step": 2980, | |
| "valid_targets_mean": 4093.0, | |
| "valid_targets_min": 734 | |
| }, | |
| { | |
| "epoch": 4.776, | |
| "grad_norm": 0.5819245393210193, | |
| "learning_rate": 2.476333208732462e-07, | |
| "loss": 0.3427, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19281116127967834, | |
| "step": 2985, | |
| "valid_targets_mean": 3587.6, | |
| "valid_targets_min": 808 | |
| }, | |
| { | |
| "epoch": 4.784, | |
| "grad_norm": 0.6047753681807744, | |
| "learning_rate": 2.3041527249863193e-07, | |
| "loss": 0.3198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18199463188648224, | |
| "step": 2990, | |
| "valid_targets_mean": 4387.0, | |
| "valid_targets_min": 764 | |
| }, | |
| { | |
| "epoch": 4.792, | |
| "grad_norm": 0.6613784930971137, | |
| "learning_rate": 2.1381411101127013e-07, | |
| "loss": 0.2938, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.132035493850708, | |
| "step": 2995, | |
| "valid_targets_mean": 2644.2, | |
| "valid_targets_min": 687 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 0.5682147375927875, | |
| "learning_rate": 1.9783035443108999e-07, | |
| "loss": 0.2967, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1099662259221077, | |
| "step": 3000, | |
| "valid_targets_mean": 2433.6, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 4.808, | |
| "grad_norm": 0.4089149079258835, | |
| "learning_rate": 1.8246450151261362e-07, | |
| "loss": 0.3093, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1526375263929367, | |
| "step": 3005, | |
| "valid_targets_mean": 7613.1, | |
| "valid_targets_min": 791 | |
| }, | |
| { | |
| "epoch": 4.816, | |
| "grad_norm": 0.55810969874875, | |
| "learning_rate": 1.6771703172940635e-07, | |
| "loss": 0.3096, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1753598004579544, | |
| "step": 3010, | |
| "valid_targets_mean": 4165.6, | |
| "valid_targets_min": 794 | |
| }, | |
| { | |
| "epoch": 4.824, | |
| "grad_norm": 0.45869761806907694, | |
| "learning_rate": 1.5358840525909967e-07, | |
| "loss": 0.3151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12356032431125641, | |
| "step": 3015, | |
| "valid_targets_mean": 4902.8, | |
| "valid_targets_min": 934 | |
| }, | |
| { | |
| "epoch": 4.832, | |
| "grad_norm": 0.5650404498859692, | |
| "learning_rate": 1.4007906296904072e-07, | |
| "loss": 0.3171, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13098034262657166, | |
| "step": 3020, | |
| "valid_targets_mean": 3383.9, | |
| "valid_targets_min": 854 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 0.5184302552337475, | |
| "learning_rate": 1.2718942640254084e-07, | |
| "loss": 0.3097, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16631071269512177, | |
| "step": 3025, | |
| "valid_targets_mean": 3698.4, | |
| "valid_targets_min": 968 | |
| }, | |
| { | |
| "epoch": 4.848, | |
| "grad_norm": 0.479932423772461, | |
| "learning_rate": 1.1491989776570623e-07, | |
| "loss": 0.3022, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15011048316955566, | |
| "step": 3030, | |
| "valid_targets_mean": 4314.4, | |
| "valid_targets_min": 1056 | |
| }, | |
| { | |
| "epoch": 4.856, | |
| "grad_norm": 0.5789239779860111, | |
| "learning_rate": 1.0327085991490127e-07, | |
| "loss": 0.3391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11871884763240814, | |
| "step": 3035, | |
| "valid_targets_mean": 2391.0, | |
| "valid_targets_min": 710 | |
| }, | |
| { | |
| "epoch": 4.864, | |
| "grad_norm": 0.5899999965939208, | |
| "learning_rate": 9.22426763447981e-08, | |
| "loss": 0.3145, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13772061467170715, | |
| "step": 3040, | |
| "valid_targets_mean": 2684.2, | |
| "valid_targets_min": 844 | |
| }, | |
| { | |
| "epoch": 4.872, | |
| "grad_norm": 0.5413636294872867, | |
| "learning_rate": 8.183569117703461e-08, | |
| "loss": 0.3173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1497182846069336, | |
| "step": 3045, | |
| "valid_targets_mean": 3171.4, | |
| "valid_targets_min": 1127 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "grad_norm": 0.5102986675669349, | |
| "learning_rate": 7.205022914946957e-08, | |
| "loss": 0.3454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20103488862514496, | |
| "step": 3050, | |
| "valid_targets_mean": 5160.4, | |
| "valid_targets_min": 936 | |
| }, | |
| { | |
| "epoch": 4.888, | |
| "grad_norm": 0.5687408463816219, | |
| "learning_rate": 6.288659560606203e-08, | |
| "loss": 0.3219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1383986473083496, | |
| "step": 3055, | |
| "valid_targets_mean": 3382.1, | |
| "valid_targets_min": 1005 | |
| }, | |
| { | |
| "epoch": 4.896, | |
| "grad_norm": 0.4996884419003454, | |
| "learning_rate": 5.4345076487332114e-08, | |
| "loss": 0.3192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18319477140903473, | |
| "step": 3060, | |
| "valid_targets_mean": 4995.0, | |
| "valid_targets_min": 1134 | |
| }, | |
| { | |
| "epoch": 4.904, | |
| "grad_norm": 0.465356601725948, | |
| "learning_rate": 4.642593832144382e-08, | |
| "loss": 0.2856, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1306002140045166, | |
| "step": 3065, | |
| "valid_targets_mean": 4357.2, | |
| "valid_targets_min": 667 | |
| }, | |
| { | |
| "epoch": 4.912, | |
| "grad_norm": 0.6879235684017253, | |
| "learning_rate": 3.912942821589161e-08, | |
| "loss": 0.2827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1553347408771515, | |
| "step": 3070, | |
| "valid_targets_mean": 5430.6, | |
| "valid_targets_min": 2537 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "grad_norm": 0.6393549752658132, | |
| "learning_rate": 3.2455773849779935e-08, | |
| "loss": 0.3238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21189448237419128, | |
| "step": 3075, | |
| "valid_targets_mean": 3572.1, | |
| "valid_targets_min": 1467 | |
| }, | |
| { | |
| "epoch": 4.928, | |
| "grad_norm": 0.4888826335889139, | |
| "learning_rate": 2.6405183466731154e-08, | |
| "loss": 0.3143, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11773912608623505, | |
| "step": 3080, | |
| "valid_targets_mean": 3744.6, | |
| "valid_targets_min": 1223 | |
| }, | |
| { | |
| "epoch": 4.936, | |
| "grad_norm": 0.5646847598387272, | |
| "learning_rate": 2.0977845868375145e-08, | |
| "loss": 0.2988, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1856897473335266, | |
| "step": 3085, | |
| "valid_targets_mean": 3780.4, | |
| "valid_targets_min": 613 | |
| }, | |
| { | |
| "epoch": 4.944, | |
| "grad_norm": 0.4228284310980491, | |
| "learning_rate": 1.6173930408467376e-08, | |
| "loss": 0.3228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14991110563278198, | |
| "step": 3090, | |
| "valid_targets_mean": 5997.0, | |
| "valid_targets_min": 817 | |
| }, | |
| { | |
| "epoch": 4.952, | |
| "grad_norm": 0.46279632136180054, | |
| "learning_rate": 1.199358698759978e-08, | |
| "loss": 0.2987, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15612635016441345, | |
| "step": 3095, | |
| "valid_targets_mean": 5848.9, | |
| "valid_targets_min": 800 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "grad_norm": 0.5214714566840865, | |
| "learning_rate": 8.436946048522298e-09, | |
| "loss": 0.3246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20194128155708313, | |
| "step": 3100, | |
| "valid_targets_mean": 4828.2, | |
| "valid_targets_min": 3631 | |
| }, | |
| { | |
| "epoch": 4.968, | |
| "grad_norm": 0.47269884538929174, | |
| "learning_rate": 5.504118572081662e-09, | |
| "loss": 0.3072, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1507304161787033, | |
| "step": 3105, | |
| "valid_targets_mean": 4550.4, | |
| "valid_targets_min": 779 | |
| }, | |
| { | |
| "epoch": 4.976, | |
| "grad_norm": 0.48876143808029926, | |
| "learning_rate": 3.1951960737419686e-09, | |
| "loss": 0.305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2046237587928772, | |
| "step": 3110, | |
| "valid_targets_mean": 6304.9, | |
| "valid_targets_min": 1809 | |
| }, | |
| { | |
| "epoch": 4.984, | |
| "grad_norm": 0.5173224903959538, | |
| "learning_rate": 1.5102506007447227e-09, | |
| "loss": 0.3073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11847519129514694, | |
| "step": 3115, | |
| "valid_targets_mean": 2805.1, | |
| "valid_targets_min": 1730 | |
| }, | |
| { | |
| "epoch": 4.992, | |
| "grad_norm": 0.5730468668213601, | |
| "learning_rate": 4.493347298528683e-10, | |
| "loss": 0.3402, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1377507597208023, | |
| "step": 3120, | |
| "valid_targets_mean": 3388.4, | |
| "valid_targets_min": 1106 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.5459769103106156, | |
| "learning_rate": 1.248156571209691e-11, | |
| "loss": 0.296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1909605711698532, | |
| "step": 3125, | |
| "valid_targets_mean": 3954.1, | |
| "valid_targets_min": 1068 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1909605711698532, | |
| "step": 3125, | |
| "total_flos": 7.266001898207969e+17, | |
| "train_loss": 0.3592725233459473, | |
| "train_runtime": 30483.9243, | |
| "train_samples_per_second": 1.64, | |
| "train_steps_per_second": 0.103, | |
| "valid_targets_mean": 3954.1, | |
| "valid_targets_min": 1068 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 3125, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.266001898207969e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |