{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 6.0, "eval_steps": 500, "global_step": 3750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008, "grad_norm": 1.1123269235502846, "learning_rate": 4.266666666666667e-07, "loss": 0.5305, "loss_nan_ranks": 0, "loss_rank_avg": 0.5401487350463867, "step": 5, "valid_targets_mean": 2568.2, "valid_targets_min": 1832 }, { "epoch": 0.016, "grad_norm": 1.0307736286257203, "learning_rate": 9.600000000000001e-07, "loss": 0.5413, "loss_nan_ranks": 0, "loss_rank_avg": 0.518206000328064, "step": 10, "valid_targets_mean": 2551.6, "valid_targets_min": 594 }, { "epoch": 0.024, "grad_norm": 1.1513067813597742, "learning_rate": 1.4933333333333336e-06, "loss": 0.5505, "loss_nan_ranks": 0, "loss_rank_avg": 0.5674519538879395, "step": 15, "valid_targets_mean": 2249.8, "valid_targets_min": 1202 }, { "epoch": 0.032, "grad_norm": 1.0885622535090271, "learning_rate": 2.0266666666666666e-06, "loss": 0.5291, "loss_nan_ranks": 0, "loss_rank_avg": 0.540584921836853, "step": 20, "valid_targets_mean": 2476.6, "valid_targets_min": 1301 }, { "epoch": 0.04, "grad_norm": 0.9309407859608781, "learning_rate": 2.56e-06, "loss": 0.5469, "loss_nan_ranks": 0, "loss_rank_avg": 0.5405070185661316, "step": 25, "valid_targets_mean": 2325.6, "valid_targets_min": 560 }, { "epoch": 0.048, "grad_norm": 0.8653574810736101, "learning_rate": 3.093333333333334e-06, "loss": 0.5578, "loss_nan_ranks": 0, "loss_rank_avg": 0.5472347140312195, "step": 30, "valid_targets_mean": 2225.7, "valid_targets_min": 689 }, { "epoch": 0.056, "grad_norm": 0.7895777368811453, "learning_rate": 3.6266666666666674e-06, "loss": 0.5594, "loss_nan_ranks": 0, "loss_rank_avg": 0.5670331716537476, "step": 35, "valid_targets_mean": 2279.0, "valid_targets_min": 643 }, { "epoch": 0.064, "grad_norm": 0.7760634586844122, "learning_rate": 4.16e-06, "loss": 0.526, "loss_nan_ranks": 0, "loss_rank_avg": 0.5177326202392578, "step": 40, "valid_targets_mean": 2332.7, "valid_targets_min": 1072 }, { "epoch": 0.072, "grad_norm": 0.6939896610202841, "learning_rate": 4.693333333333334e-06, "loss": 0.5419, "loss_nan_ranks": 0, "loss_rank_avg": 0.522503137588501, "step": 45, "valid_targets_mean": 2649.1, "valid_targets_min": 1594 }, { "epoch": 0.08, "grad_norm": 0.6701763260463834, "learning_rate": 5.226666666666667e-06, "loss": 0.51, "loss_nan_ranks": 0, "loss_rank_avg": 0.4922634959220886, "step": 50, "valid_targets_mean": 2683.0, "valid_targets_min": 1193 }, { "epoch": 0.088, "grad_norm": 0.8048346477254813, "learning_rate": 5.76e-06, "loss": 0.5208, "loss_nan_ranks": 0, "loss_rank_avg": 0.5240609049797058, "step": 55, "valid_targets_mean": 2272.8, "valid_targets_min": 786 }, { "epoch": 0.096, "grad_norm": 0.65782981166933, "learning_rate": 6.293333333333334e-06, "loss": 0.5392, "loss_nan_ranks": 0, "loss_rank_avg": 0.5241539478302002, "step": 60, "valid_targets_mean": 2635.2, "valid_targets_min": 2017 }, { "epoch": 0.104, "grad_norm": 0.8692611396107511, "learning_rate": 6.826666666666667e-06, "loss": 0.5202, "loss_nan_ranks": 0, "loss_rank_avg": 0.5587050914764404, "step": 65, "valid_targets_mean": 2095.1, "valid_targets_min": 642 }, { "epoch": 0.112, "grad_norm": 0.7327308095270739, "learning_rate": 7.360000000000001e-06, "loss": 0.5126, "loss_nan_ranks": 0, "loss_rank_avg": 0.5429940819740295, "step": 70, "valid_targets_mean": 2413.7, "valid_targets_min": 809 }, { "epoch": 0.12, "grad_norm": 0.7114841665616957, "learning_rate": 7.893333333333335e-06, "loss": 0.5388, "loss_nan_ranks": 0, "loss_rank_avg": 0.5138970017433167, "step": 75, "valid_targets_mean": 2285.8, "valid_targets_min": 654 }, { "epoch": 0.128, "grad_norm": 0.7450057042477, "learning_rate": 8.426666666666667e-06, "loss": 0.5241, "loss_nan_ranks": 0, "loss_rank_avg": 0.522254228591919, "step": 80, "valid_targets_mean": 2392.8, "valid_targets_min": 834 }, { "epoch": 0.136, "grad_norm": 0.6646870780143527, "learning_rate": 8.96e-06, "loss": 0.5321, "loss_nan_ranks": 0, "loss_rank_avg": 0.4835280776023865, "step": 85, "valid_targets_mean": 2408.1, "valid_targets_min": 863 }, { "epoch": 0.144, "grad_norm": 0.7685295225223336, "learning_rate": 9.493333333333334e-06, "loss": 0.4985, "loss_nan_ranks": 0, "loss_rank_avg": 0.5568891763687134, "step": 90, "valid_targets_mean": 2293.4, "valid_targets_min": 863 }, { "epoch": 0.152, "grad_norm": 0.757668913170068, "learning_rate": 1.0026666666666667e-05, "loss": 0.489, "loss_nan_ranks": 0, "loss_rank_avg": 0.49190622568130493, "step": 95, "valid_targets_mean": 1997.3, "valid_targets_min": 598 }, { "epoch": 0.16, "grad_norm": 0.8812780405653656, "learning_rate": 1.056e-05, "loss": 0.5115, "loss_nan_ranks": 0, "loss_rank_avg": 0.5440094470977783, "step": 100, "valid_targets_mean": 2036.3, "valid_targets_min": 627 }, { "epoch": 0.168, "grad_norm": 0.7100510683842957, "learning_rate": 1.1093333333333334e-05, "loss": 0.5021, "loss_nan_ranks": 0, "loss_rank_avg": 0.4753778576850891, "step": 105, "valid_targets_mean": 2323.8, "valid_targets_min": 591 }, { "epoch": 0.176, "grad_norm": 0.6512988020949609, "learning_rate": 1.1626666666666668e-05, "loss": 0.5164, "loss_nan_ranks": 0, "loss_rank_avg": 0.49330976605415344, "step": 110, "valid_targets_mean": 2537.6, "valid_targets_min": 581 }, { "epoch": 0.184, "grad_norm": 0.7639238062022932, "learning_rate": 1.216e-05, "loss": 0.5252, "loss_nan_ranks": 0, "loss_rank_avg": 0.5586614608764648, "step": 115, "valid_targets_mean": 2337.1, "valid_targets_min": 882 }, { "epoch": 0.192, "grad_norm": 0.7575138893965674, "learning_rate": 1.2693333333333336e-05, "loss": 0.5178, "loss_nan_ranks": 0, "loss_rank_avg": 0.5260697603225708, "step": 120, "valid_targets_mean": 2170.1, "valid_targets_min": 695 }, { "epoch": 0.2, "grad_norm": 0.702449713567109, "learning_rate": 1.3226666666666668e-05, "loss": 0.5188, "loss_nan_ranks": 0, "loss_rank_avg": 0.5398459434509277, "step": 125, "valid_targets_mean": 2577.2, "valid_targets_min": 629 }, { "epoch": 0.208, "grad_norm": 0.7808788433928092, "learning_rate": 1.376e-05, "loss": 0.5113, "loss_nan_ranks": 0, "loss_rank_avg": 0.5482888221740723, "step": 130, "valid_targets_mean": 1933.8, "valid_targets_min": 852 }, { "epoch": 0.216, "grad_norm": 0.7056899770108914, "learning_rate": 1.4293333333333334e-05, "loss": 0.4977, "loss_nan_ranks": 0, "loss_rank_avg": 0.4935382008552551, "step": 135, "valid_targets_mean": 2204.5, "valid_targets_min": 584 }, { "epoch": 0.224, "grad_norm": 0.7966879290545987, "learning_rate": 1.4826666666666666e-05, "loss": 0.4848, "loss_nan_ranks": 0, "loss_rank_avg": 0.5101238489151001, "step": 140, "valid_targets_mean": 2561.9, "valid_targets_min": 1349 }, { "epoch": 0.232, "grad_norm": 0.7943913824299708, "learning_rate": 1.5360000000000002e-05, "loss": 0.5185, "loss_nan_ranks": 0, "loss_rank_avg": 0.5645924806594849, "step": 145, "valid_targets_mean": 1990.6, "valid_targets_min": 578 }, { "epoch": 0.24, "grad_norm": 1.0993272339263302, "learning_rate": 1.5893333333333333e-05, "loss": 0.5205, "loss_nan_ranks": 0, "loss_rank_avg": 0.5444607734680176, "step": 150, "valid_targets_mean": 2103.9, "valid_targets_min": 522 }, { "epoch": 0.248, "grad_norm": 0.7214471657235726, "learning_rate": 1.642666666666667e-05, "loss": 0.4914, "loss_nan_ranks": 0, "loss_rank_avg": 0.48658621311187744, "step": 155, "valid_targets_mean": 2252.8, "valid_targets_min": 506 }, { "epoch": 0.256, "grad_norm": 0.647860810097823, "learning_rate": 1.696e-05, "loss": 0.4897, "loss_nan_ranks": 0, "loss_rank_avg": 0.4415959417819977, "step": 160, "valid_targets_mean": 2587.4, "valid_targets_min": 1156 }, { "epoch": 0.264, "grad_norm": 0.7996121351509609, "learning_rate": 1.7493333333333334e-05, "loss": 0.4974, "loss_nan_ranks": 0, "loss_rank_avg": 0.5080252885818481, "step": 165, "valid_targets_mean": 2078.8, "valid_targets_min": 894 }, { "epoch": 0.272, "grad_norm": 0.7839710134675049, "learning_rate": 1.8026666666666668e-05, "loss": 0.5054, "loss_nan_ranks": 0, "loss_rank_avg": 0.5535873174667358, "step": 170, "valid_targets_mean": 2108.3, "valid_targets_min": 956 }, { "epoch": 0.28, "grad_norm": 0.7271134989631454, "learning_rate": 1.8560000000000002e-05, "loss": 0.4994, "loss_nan_ranks": 0, "loss_rank_avg": 0.5392011404037476, "step": 175, "valid_targets_mean": 2380.4, "valid_targets_min": 523 }, { "epoch": 0.288, "grad_norm": 0.7438261523269221, "learning_rate": 1.9093333333333336e-05, "loss": 0.4563, "loss_nan_ranks": 0, "loss_rank_avg": 0.4989452362060547, "step": 180, "valid_targets_mean": 2122.2, "valid_targets_min": 505 }, { "epoch": 0.296, "grad_norm": 0.7087097494439891, "learning_rate": 1.9626666666666666e-05, "loss": 0.4907, "loss_nan_ranks": 0, "loss_rank_avg": 0.440179705619812, "step": 185, "valid_targets_mean": 2344.2, "valid_targets_min": 677 }, { "epoch": 0.304, "grad_norm": 0.7922339174945309, "learning_rate": 2.016e-05, "loss": 0.5109, "loss_nan_ranks": 0, "loss_rank_avg": 0.5441197752952576, "step": 190, "valid_targets_mean": 2192.1, "valid_targets_min": 674 }, { "epoch": 0.312, "grad_norm": 0.8719375736319943, "learning_rate": 2.0693333333333334e-05, "loss": 0.4719, "loss_nan_ranks": 0, "loss_rank_avg": 0.4866606593132019, "step": 195, "valid_targets_mean": 2093.7, "valid_targets_min": 720 }, { "epoch": 0.32, "grad_norm": 0.7147407436139384, "learning_rate": 2.1226666666666668e-05, "loss": 0.5033, "loss_nan_ranks": 0, "loss_rank_avg": 0.48567086458206177, "step": 200, "valid_targets_mean": 2235.4, "valid_targets_min": 891 }, { "epoch": 0.328, "grad_norm": 0.6645224606175668, "learning_rate": 2.1760000000000002e-05, "loss": 0.4713, "loss_nan_ranks": 0, "loss_rank_avg": 0.45803600549697876, "step": 205, "valid_targets_mean": 2461.2, "valid_targets_min": 767 }, { "epoch": 0.336, "grad_norm": 0.6835201565681405, "learning_rate": 2.2293333333333336e-05, "loss": 0.4826, "loss_nan_ranks": 0, "loss_rank_avg": 0.49388882517814636, "step": 210, "valid_targets_mean": 2384.2, "valid_targets_min": 741 }, { "epoch": 0.344, "grad_norm": 0.7571298087512041, "learning_rate": 2.282666666666667e-05, "loss": 0.5161, "loss_nan_ranks": 0, "loss_rank_avg": 0.5247617959976196, "step": 215, "valid_targets_mean": 2137.7, "valid_targets_min": 803 }, { "epoch": 0.352, "grad_norm": 0.7493305637197739, "learning_rate": 2.336e-05, "loss": 0.4818, "loss_nan_ranks": 0, "loss_rank_avg": 0.4521583914756775, "step": 220, "valid_targets_mean": 2493.7, "valid_targets_min": 1072 }, { "epoch": 0.36, "grad_norm": 0.6302236122957066, "learning_rate": 2.3893333333333337e-05, "loss": 0.4597, "loss_nan_ranks": 0, "loss_rank_avg": 0.4275933504104614, "step": 225, "valid_targets_mean": 2858.3, "valid_targets_min": 1495 }, { "epoch": 0.368, "grad_norm": 0.7547426741735809, "learning_rate": 2.442666666666667e-05, "loss": 0.5046, "loss_nan_ranks": 0, "loss_rank_avg": 0.49721017479896545, "step": 230, "valid_targets_mean": 2059.5, "valid_targets_min": 804 }, { "epoch": 0.376, "grad_norm": 0.8593657667258695, "learning_rate": 2.496e-05, "loss": 0.4785, "loss_nan_ranks": 0, "loss_rank_avg": 0.5257455110549927, "step": 235, "valid_targets_mean": 2286.9, "valid_targets_min": 1010 }, { "epoch": 0.384, "grad_norm": 0.6840458587232336, "learning_rate": 2.5493333333333335e-05, "loss": 0.478, "loss_nan_ranks": 0, "loss_rank_avg": 0.47672101855278015, "step": 240, "valid_targets_mean": 2516.1, "valid_targets_min": 748 }, { "epoch": 0.392, "grad_norm": 0.7701222695014215, "learning_rate": 2.6026666666666666e-05, "loss": 0.5012, "loss_nan_ranks": 0, "loss_rank_avg": 0.5424163341522217, "step": 245, "valid_targets_mean": 2096.6, "valid_targets_min": 938 }, { "epoch": 0.4, "grad_norm": 0.8091486942145641, "learning_rate": 2.6560000000000003e-05, "loss": 0.4936, "loss_nan_ranks": 0, "loss_rank_avg": 0.5072291493415833, "step": 250, "valid_targets_mean": 2084.3, "valid_targets_min": 588 }, { "epoch": 0.408, "grad_norm": 0.6898982048013277, "learning_rate": 2.7093333333333337e-05, "loss": 0.4968, "loss_nan_ranks": 0, "loss_rank_avg": 0.4925045967102051, "step": 255, "valid_targets_mean": 2633.6, "valid_targets_min": 777 }, { "epoch": 0.416, "grad_norm": 0.8251422552999986, "learning_rate": 2.7626666666666668e-05, "loss": 0.5298, "loss_nan_ranks": 0, "loss_rank_avg": 0.5059669017791748, "step": 260, "valid_targets_mean": 2005.4, "valid_targets_min": 666 }, { "epoch": 0.424, "grad_norm": 0.7431003149844874, "learning_rate": 2.816e-05, "loss": 0.4875, "loss_nan_ranks": 0, "loss_rank_avg": 0.5014691352844238, "step": 265, "valid_targets_mean": 2241.7, "valid_targets_min": 1208 }, { "epoch": 0.432, "grad_norm": 0.6125432327307639, "learning_rate": 2.869333333333334e-05, "loss": 0.4721, "loss_nan_ranks": 0, "loss_rank_avg": 0.4296124577522278, "step": 270, "valid_targets_mean": 2600.2, "valid_targets_min": 608 }, { "epoch": 0.44, "grad_norm": 0.8330366261881735, "learning_rate": 2.922666666666667e-05, "loss": 0.4906, "loss_nan_ranks": 0, "loss_rank_avg": 0.5452744960784912, "step": 275, "valid_targets_mean": 2064.5, "valid_targets_min": 641 }, { "epoch": 0.448, "grad_norm": 0.7334594138189166, "learning_rate": 2.9760000000000003e-05, "loss": 0.4781, "loss_nan_ranks": 0, "loss_rank_avg": 0.4792283773422241, "step": 280, "valid_targets_mean": 2248.9, "valid_targets_min": 770 }, { "epoch": 0.456, "grad_norm": 0.7048066084402315, "learning_rate": 3.0293333333333334e-05, "loss": 0.5071, "loss_nan_ranks": 0, "loss_rank_avg": 0.4940047264099121, "step": 285, "valid_targets_mean": 2448.4, "valid_targets_min": 718 }, { "epoch": 0.464, "grad_norm": 0.7321813936089969, "learning_rate": 3.0826666666666674e-05, "loss": 0.4861, "loss_nan_ranks": 0, "loss_rank_avg": 0.4731312096118927, "step": 290, "valid_targets_mean": 2213.3, "valid_targets_min": 614 }, { "epoch": 0.472, "grad_norm": 0.7327861766468036, "learning_rate": 3.1360000000000005e-05, "loss": 0.4978, "loss_nan_ranks": 0, "loss_rank_avg": 0.4812717139720917, "step": 295, "valid_targets_mean": 2238.8, "valid_targets_min": 612 }, { "epoch": 0.48, "grad_norm": 0.6978085848971363, "learning_rate": 3.1893333333333335e-05, "loss": 0.4803, "loss_nan_ranks": 0, "loss_rank_avg": 0.46808069944381714, "step": 300, "valid_targets_mean": 2457.3, "valid_targets_min": 442 }, { "epoch": 0.488, "grad_norm": 0.8111913410877619, "learning_rate": 3.2426666666666666e-05, "loss": 0.479, "loss_nan_ranks": 0, "loss_rank_avg": 0.44130653142929077, "step": 305, "valid_targets_mean": 2637.6, "valid_targets_min": 1587 }, { "epoch": 0.496, "grad_norm": 0.7458942349832357, "learning_rate": 3.296e-05, "loss": 0.4808, "loss_nan_ranks": 0, "loss_rank_avg": 0.49815142154693604, "step": 310, "valid_targets_mean": 2545.6, "valid_targets_min": 869 }, { "epoch": 0.504, "grad_norm": 0.7385285311519089, "learning_rate": 3.349333333333334e-05, "loss": 0.4661, "loss_nan_ranks": 0, "loss_rank_avg": 0.5133688449859619, "step": 315, "valid_targets_mean": 2280.2, "valid_targets_min": 386 }, { "epoch": 0.512, "grad_norm": 0.7586069534178319, "learning_rate": 3.402666666666667e-05, "loss": 0.4886, "loss_nan_ranks": 0, "loss_rank_avg": 0.4860743284225464, "step": 320, "valid_targets_mean": 2133.5, "valid_targets_min": 712 }, { "epoch": 0.52, "grad_norm": 0.6422577702781003, "learning_rate": 3.456e-05, "loss": 0.4812, "loss_nan_ranks": 0, "loss_rank_avg": 0.4424203634262085, "step": 325, "valid_targets_mean": 2430.3, "valid_targets_min": 978 }, { "epoch": 0.528, "grad_norm": 0.6745500124718121, "learning_rate": 3.509333333333333e-05, "loss": 0.4766, "loss_nan_ranks": 0, "loss_rank_avg": 0.49251464009284973, "step": 330, "valid_targets_mean": 2522.8, "valid_targets_min": 820 }, { "epoch": 0.536, "grad_norm": 0.6458982073475765, "learning_rate": 3.562666666666667e-05, "loss": 0.475, "loss_nan_ranks": 0, "loss_rank_avg": 0.4584057331085205, "step": 335, "valid_targets_mean": 2477.6, "valid_targets_min": 969 }, { "epoch": 0.544, "grad_norm": 0.6962215050016953, "learning_rate": 3.6160000000000006e-05, "loss": 0.4734, "loss_nan_ranks": 0, "loss_rank_avg": 0.4557253122329712, "step": 340, "valid_targets_mean": 2211.9, "valid_targets_min": 659 }, { "epoch": 0.552, "grad_norm": 0.6214790557238985, "learning_rate": 3.669333333333334e-05, "loss": 0.4598, "loss_nan_ranks": 0, "loss_rank_avg": 0.439766526222229, "step": 345, "valid_targets_mean": 2683.5, "valid_targets_min": 1118 }, { "epoch": 0.56, "grad_norm": 0.6918806504772981, "learning_rate": 3.722666666666667e-05, "loss": 0.4765, "loss_nan_ranks": 0, "loss_rank_avg": 0.48400256037712097, "step": 350, "valid_targets_mean": 2405.7, "valid_targets_min": 1346 }, { "epoch": 0.568, "grad_norm": 0.700219210449274, "learning_rate": 3.7760000000000004e-05, "loss": 0.4668, "loss_nan_ranks": 0, "loss_rank_avg": 0.48044857382774353, "step": 355, "valid_targets_mean": 2154.9, "valid_targets_min": 616 }, { "epoch": 0.576, "grad_norm": 0.7550351146312819, "learning_rate": 3.8293333333333335e-05, "loss": 0.4845, "loss_nan_ranks": 0, "loss_rank_avg": 0.4983769655227661, "step": 360, "valid_targets_mean": 1993.4, "valid_targets_min": 670 }, { "epoch": 0.584, "grad_norm": 0.8289049493344646, "learning_rate": 3.882666666666667e-05, "loss": 0.4842, "loss_nan_ranks": 0, "loss_rank_avg": 0.5466411113739014, "step": 365, "valid_targets_mean": 2076.8, "valid_targets_min": 698 }, { "epoch": 0.592, "grad_norm": 0.6624910703185226, "learning_rate": 3.936e-05, "loss": 0.5011, "loss_nan_ranks": 0, "loss_rank_avg": 0.482534259557724, "step": 370, "valid_targets_mean": 2544.8, "valid_targets_min": 1580 }, { "epoch": 0.6, "grad_norm": 0.782517624818849, "learning_rate": 3.989333333333333e-05, "loss": 0.4851, "loss_nan_ranks": 0, "loss_rank_avg": 0.47620609402656555, "step": 375, "valid_targets_mean": 1883.6, "valid_targets_min": 685 }, { "epoch": 0.608, "grad_norm": 0.6417723288508768, "learning_rate": 3.9999861365387784e-05, "loss": 0.4697, "loss_nan_ranks": 0, "loss_rank_avg": 0.4307625889778137, "step": 380, "valid_targets_mean": 2684.2, "valid_targets_min": 974 }, { "epoch": 0.616, "grad_norm": 0.6498871680670533, "learning_rate": 3.9999298165569614e-05, "loss": 0.4618, "loss_nan_ranks": 0, "loss_rank_avg": 0.4357556402683258, "step": 385, "valid_targets_mean": 2512.1, "valid_targets_min": 879 }, { "epoch": 0.624, "grad_norm": 0.699782882602203, "learning_rate": 3.999830174807269e-05, "loss": 0.4722, "loss_nan_ranks": 0, "loss_rank_avg": 0.47853145003318787, "step": 390, "valid_targets_mean": 2307.6, "valid_targets_min": 846 }, { "epoch": 0.632, "grad_norm": 0.6566001605088011, "learning_rate": 3.9996872134481036e-05, "loss": 0.4579, "loss_nan_ranks": 0, "loss_rank_avg": 0.472840815782547, "step": 395, "valid_targets_mean": 2387.8, "valid_targets_min": 764 }, { "epoch": 0.64, "grad_norm": 0.8011644192046202, "learning_rate": 3.999500935576245e-05, "loss": 0.4831, "loss_nan_ranks": 0, "loss_rank_avg": 0.47220632433891296, "step": 400, "valid_targets_mean": 2383.8, "valid_targets_min": 886 }, { "epoch": 0.648, "grad_norm": 0.7219930471596472, "learning_rate": 3.999271345226776e-05, "loss": 0.4846, "loss_nan_ranks": 0, "loss_rank_avg": 0.510077178478241, "step": 405, "valid_targets_mean": 2494.2, "valid_targets_min": 951 }, { "epoch": 0.656, "grad_norm": 0.6472532682023311, "learning_rate": 3.9989984473730035e-05, "loss": 0.4598, "loss_nan_ranks": 0, "loss_rank_avg": 0.4869913160800934, "step": 410, "valid_targets_mean": 2634.6, "valid_targets_min": 826 }, { "epoch": 0.664, "grad_norm": 0.626388386803493, "learning_rate": 3.998682247926343e-05, "loss": 0.458, "loss_nan_ranks": 0, "loss_rank_avg": 0.45631903409957886, "step": 415, "valid_targets_mean": 2277.9, "valid_targets_min": 396 }, { "epoch": 0.672, "grad_norm": 0.6114116058267862, "learning_rate": 3.998322753736193e-05, "loss": 0.4832, "loss_nan_ranks": 0, "loss_rank_avg": 0.47891151905059814, "step": 420, "valid_targets_mean": 2695.1, "valid_targets_min": 760 }, { "epoch": 0.68, "grad_norm": 0.6591550740243267, "learning_rate": 3.99791997258979e-05, "loss": 0.4783, "loss_nan_ranks": 0, "loss_rank_avg": 0.45776450634002686, "step": 425, "valid_targets_mean": 2526.2, "valid_targets_min": 900 }, { "epoch": 0.688, "grad_norm": 0.6663403401337069, "learning_rate": 3.997473913212036e-05, "loss": 0.4678, "loss_nan_ranks": 0, "loss_rank_avg": 0.48343199491500854, "step": 430, "valid_targets_mean": 2378.8, "valid_targets_min": 795 }, { "epoch": 0.696, "grad_norm": 0.6670143387646009, "learning_rate": 3.9969845852653087e-05, "loss": 0.478, "loss_nan_ranks": 0, "loss_rank_avg": 0.45757466554641724, "step": 435, "valid_targets_mean": 2522.7, "valid_targets_min": 1505 }, { "epoch": 0.704, "grad_norm": 0.58199200233584, "learning_rate": 3.996451999349258e-05, "loss": 0.4386, "loss_nan_ranks": 0, "loss_rank_avg": 0.42813169956207275, "step": 440, "valid_targets_mean": 2870.2, "valid_targets_min": 1437 }, { "epoch": 0.712, "grad_norm": 0.6752845213387725, "learning_rate": 3.995876167000569e-05, "loss": 0.4821, "loss_nan_ranks": 0, "loss_rank_avg": 0.47400549054145813, "step": 445, "valid_targets_mean": 2750.9, "valid_targets_min": 944 }, { "epoch": 0.72, "grad_norm": 0.667561633288874, "learning_rate": 3.9952571006927186e-05, "loss": 0.4551, "loss_nan_ranks": 0, "loss_rank_avg": 0.48587462306022644, "step": 450, "valid_targets_mean": 2546.8, "valid_targets_min": 719 }, { "epoch": 0.728, "grad_norm": 0.6724373683462508, "learning_rate": 3.9945948138356995e-05, "loss": 0.4661, "loss_nan_ranks": 0, "loss_rank_avg": 0.4643342196941376, "step": 455, "valid_targets_mean": 2515.9, "valid_targets_min": 1330 }, { "epoch": 0.736, "grad_norm": 0.7049819458106401, "learning_rate": 3.993889320775735e-05, "loss": 0.4656, "loss_nan_ranks": 0, "loss_rank_avg": 0.4860385060310364, "step": 460, "valid_targets_mean": 2231.8, "valid_targets_min": 576 }, { "epoch": 0.744, "grad_norm": 0.6400462079946704, "learning_rate": 3.9931406367949627e-05, "loss": 0.4794, "loss_nan_ranks": 0, "loss_rank_avg": 0.4631817042827606, "step": 465, "valid_targets_mean": 2380.6, "valid_targets_min": 667 }, { "epoch": 0.752, "grad_norm": 0.8286441176655512, "learning_rate": 3.9923487781111106e-05, "loss": 0.4646, "loss_nan_ranks": 0, "loss_rank_avg": 0.4641321003437042, "step": 470, "valid_targets_mean": 2411.0, "valid_targets_min": 749 }, { "epoch": 0.76, "grad_norm": 0.7068889030236437, "learning_rate": 3.9915137618771386e-05, "loss": 0.4751, "loss_nan_ranks": 0, "loss_rank_avg": 0.46361860632896423, "step": 475, "valid_targets_mean": 2641.8, "valid_targets_min": 751 }, { "epoch": 0.768, "grad_norm": 0.6181802936183315, "learning_rate": 3.9906356061808713e-05, "loss": 0.4556, "loss_nan_ranks": 0, "loss_rank_avg": 0.47535887360572815, "step": 480, "valid_targets_mean": 2579.4, "valid_targets_min": 1069 }, { "epoch": 0.776, "grad_norm": 0.754698609738125, "learning_rate": 3.9897143300446055e-05, "loss": 0.4762, "loss_nan_ranks": 0, "loss_rank_avg": 0.49311938881874084, "step": 485, "valid_targets_mean": 2111.9, "valid_targets_min": 417 }, { "epoch": 0.784, "grad_norm": 0.679634462005907, "learning_rate": 3.988749953424696e-05, "loss": 0.4786, "loss_nan_ranks": 0, "loss_rank_avg": 0.46043074131011963, "step": 490, "valid_targets_mean": 2328.2, "valid_targets_min": 615 }, { "epoch": 0.792, "grad_norm": 0.815808151327097, "learning_rate": 3.9877424972111264e-05, "loss": 0.4692, "loss_nan_ranks": 0, "loss_rank_avg": 0.5270492434501648, "step": 495, "valid_targets_mean": 2088.6, "valid_targets_min": 798 }, { "epoch": 0.8, "grad_norm": 0.5897123286869944, "learning_rate": 3.9866919832270554e-05, "loss": 0.4608, "loss_nan_ranks": 0, "loss_rank_avg": 0.4187796711921692, "step": 500, "valid_targets_mean": 2436.2, "valid_targets_min": 1019 }, { "epoch": 0.808, "grad_norm": 0.7432501071695375, "learning_rate": 3.9855984342283414e-05, "loss": 0.4667, "loss_nan_ranks": 0, "loss_rank_avg": 0.4671873450279236, "step": 505, "valid_targets_mean": 1886.4, "valid_targets_min": 681 }, { "epoch": 0.816, "grad_norm": 0.6216648090898564, "learning_rate": 3.9844618739030545e-05, "loss": 0.4729, "loss_nan_ranks": 0, "loss_rank_avg": 0.4360358715057373, "step": 510, "valid_targets_mean": 2900.6, "valid_targets_min": 684 }, { "epoch": 0.824, "grad_norm": 0.6939020046248706, "learning_rate": 3.98328232687096e-05, "loss": 0.4739, "loss_nan_ranks": 0, "loss_rank_avg": 0.49482274055480957, "step": 515, "valid_targets_mean": 2357.6, "valid_targets_min": 590 }, { "epoch": 0.832, "grad_norm": 0.721859248226527, "learning_rate": 3.982059818682986e-05, "loss": 0.5004, "loss_nan_ranks": 0, "loss_rank_avg": 0.5051529407501221, "step": 520, "valid_targets_mean": 2156.8, "valid_targets_min": 687 }, { "epoch": 0.84, "grad_norm": 0.6494270332660046, "learning_rate": 3.980794375820669e-05, "loss": 0.4499, "loss_nan_ranks": 0, "loss_rank_avg": 0.4555926024913788, "step": 525, "valid_targets_mean": 2189.4, "valid_targets_min": 674 }, { "epoch": 0.848, "grad_norm": 0.7317549355927052, "learning_rate": 3.9794860256955825e-05, "loss": 0.4758, "loss_nan_ranks": 0, "loss_rank_avg": 0.48144295811653137, "step": 530, "valid_targets_mean": 1934.8, "valid_targets_min": 777 }, { "epoch": 0.856, "grad_norm": 0.6819599897787019, "learning_rate": 3.9781347966487415e-05, "loss": 0.4861, "loss_nan_ranks": 0, "loss_rank_avg": 0.49034303426742554, "step": 535, "valid_targets_mean": 2477.9, "valid_targets_min": 615 }, { "epoch": 0.864, "grad_norm": 0.6893642280639611, "learning_rate": 3.9767407179499875e-05, "loss": 0.4751, "loss_nan_ranks": 0, "loss_rank_avg": 0.481620728969574, "step": 540, "valid_targets_mean": 2269.1, "valid_targets_min": 496 }, { "epoch": 0.872, "grad_norm": 0.731052766030611, "learning_rate": 3.975303819797358e-05, "loss": 0.4605, "loss_nan_ranks": 0, "loss_rank_avg": 0.4804762601852417, "step": 545, "valid_targets_mean": 2014.6, "valid_targets_min": 825 }, { "epoch": 0.88, "grad_norm": 0.6245922571157282, "learning_rate": 3.973824133316431e-05, "loss": 0.4514, "loss_nan_ranks": 0, "loss_rank_avg": 0.4820593297481537, "step": 550, "valid_targets_mean": 2448.4, "valid_targets_min": 895 }, { "epoch": 0.888, "grad_norm": 0.6626544949374586, "learning_rate": 3.972301690559645e-05, "loss": 0.4512, "loss_nan_ranks": 0, "loss_rank_avg": 0.4780968129634857, "step": 555, "valid_targets_mean": 2211.1, "valid_targets_min": 1262 }, { "epoch": 0.896, "grad_norm": 0.6705544140440111, "learning_rate": 3.970736524505615e-05, "loss": 0.4664, "loss_nan_ranks": 0, "loss_rank_avg": 0.4690876603126526, "step": 560, "valid_targets_mean": 2282.8, "valid_targets_min": 729 }, { "epoch": 0.904, "grad_norm": 0.6584722171298177, "learning_rate": 3.969128669058411e-05, "loss": 0.4626, "loss_nan_ranks": 0, "loss_rank_avg": 0.4553741216659546, "step": 565, "valid_targets_mean": 2019.5, "valid_targets_min": 484 }, { "epoch": 0.912, "grad_norm": 0.6229943668673381, "learning_rate": 3.9674781590468256e-05, "loss": 0.4442, "loss_nan_ranks": 0, "loss_rank_avg": 0.45545560121536255, "step": 570, "valid_targets_mean": 2519.6, "valid_targets_min": 841 }, { "epoch": 0.92, "grad_norm": 0.6751176168853488, "learning_rate": 3.9657850302236184e-05, "loss": 0.4485, "loss_nan_ranks": 0, "loss_rank_avg": 0.47964492440223694, "step": 575, "valid_targets_mean": 2350.7, "valid_targets_min": 1221 }, { "epoch": 0.928, "grad_norm": 0.6014793412024203, "learning_rate": 3.964049319264744e-05, "loss": 0.4446, "loss_nan_ranks": 0, "loss_rank_avg": 0.4613630473613739, "step": 580, "valid_targets_mean": 2549.6, "valid_targets_min": 812 }, { "epoch": 0.936, "grad_norm": 0.5807355939238428, "learning_rate": 3.962271063768555e-05, "loss": 0.4569, "loss_nan_ranks": 0, "loss_rank_avg": 0.4311615824699402, "step": 585, "valid_targets_mean": 2432.5, "valid_targets_min": 851 }, { "epoch": 0.944, "grad_norm": 0.6178326812634055, "learning_rate": 3.960450302254989e-05, "loss": 0.4474, "loss_nan_ranks": 0, "loss_rank_avg": 0.4509955048561096, "step": 590, "valid_targets_mean": 2490.1, "valid_targets_min": 746 }, { "epoch": 0.952, "grad_norm": 0.6233350959016624, "learning_rate": 3.958587074164735e-05, "loss": 0.4447, "loss_nan_ranks": 0, "loss_rank_avg": 0.45208895206451416, "step": 595, "valid_targets_mean": 2428.0, "valid_targets_min": 998 }, { "epoch": 0.96, "grad_norm": 0.7009493324631896, "learning_rate": 3.956681419858376e-05, "loss": 0.4571, "loss_nan_ranks": 0, "loss_rank_avg": 0.4502309560775757, "step": 600, "valid_targets_mean": 2555.1, "valid_targets_min": 987 }, { "epoch": 0.968, "grad_norm": 0.5923922989881036, "learning_rate": 3.954733380615516e-05, "loss": 0.4396, "loss_nan_ranks": 0, "loss_rank_avg": 0.4207536578178406, "step": 605, "valid_targets_mean": 2795.0, "valid_targets_min": 1191 }, { "epoch": 0.976, "grad_norm": 0.5779207349870441, "learning_rate": 3.95274299863389e-05, "loss": 0.4469, "loss_nan_ranks": 0, "loss_rank_avg": 0.4246981739997864, "step": 610, "valid_targets_mean": 2679.1, "valid_targets_min": 1744 }, { "epoch": 0.984, "grad_norm": 0.6198976743598128, "learning_rate": 3.950710317028443e-05, "loss": 0.4532, "loss_nan_ranks": 0, "loss_rank_avg": 0.4403854012489319, "step": 615, "valid_targets_mean": 2471.3, "valid_targets_min": 1215 }, { "epoch": 0.992, "grad_norm": 0.6121629508015668, "learning_rate": 3.9486353798303996e-05, "loss": 0.443, "loss_nan_ranks": 0, "loss_rank_avg": 0.4395740032196045, "step": 620, "valid_targets_mean": 2559.3, "valid_targets_min": 676 }, { "epoch": 1.0, "grad_norm": 0.799149286424459, "learning_rate": 3.946518231986313e-05, "loss": 0.4748, "loss_nan_ranks": 0, "loss_rank_avg": 0.49580055475234985, "step": 625, "valid_targets_mean": 2016.3, "valid_targets_min": 653 }, { "epoch": 1.008, "grad_norm": 0.5330351493981808, "learning_rate": 3.9443589193570847e-05, "loss": 0.4202, "loss_nan_ranks": 0, "loss_rank_avg": 0.39457374811172485, "step": 630, "valid_targets_mean": 2838.1, "valid_targets_min": 606 }, { "epoch": 1.016, "grad_norm": 0.6549967985881883, "learning_rate": 3.942157488716976e-05, "loss": 0.4273, "loss_nan_ranks": 0, "loss_rank_avg": 0.4309835433959961, "step": 635, "valid_targets_mean": 2344.7, "valid_targets_min": 841 }, { "epoch": 1.024, "grad_norm": 0.7153660835249178, "learning_rate": 3.939913987752595e-05, "loss": 0.4383, "loss_nan_ranks": 0, "loss_rank_avg": 0.471935510635376, "step": 640, "valid_targets_mean": 2266.1, "valid_targets_min": 741 }, { "epoch": 1.032, "grad_norm": 0.8338207180913084, "learning_rate": 3.9376284650618605e-05, "loss": 0.4401, "loss_nan_ranks": 0, "loss_rank_avg": 0.4705868065357208, "step": 645, "valid_targets_mean": 2167.0, "valid_targets_min": 601 }, { "epoch": 1.04, "grad_norm": 0.7086511513875436, "learning_rate": 3.935300970152952e-05, "loss": 0.4555, "loss_nan_ranks": 0, "loss_rank_avg": 0.4832397699356079, "step": 650, "valid_targets_mean": 2114.9, "valid_targets_min": 484 }, { "epoch": 1.048, "grad_norm": 0.6225217966467873, "learning_rate": 3.932931553443235e-05, "loss": 0.4612, "loss_nan_ranks": 0, "loss_rank_avg": 0.4231920838356018, "step": 655, "valid_targets_mean": 2642.4, "valid_targets_min": 1311 }, { "epoch": 1.056, "grad_norm": 0.5825267016910932, "learning_rate": 3.930520266258173e-05, "loss": 0.4561, "loss_nan_ranks": 0, "loss_rank_avg": 0.4089941084384918, "step": 660, "valid_targets_mean": 2996.6, "valid_targets_min": 620 }, { "epoch": 1.064, "grad_norm": 0.7200762284984045, "learning_rate": 3.928067160830208e-05, "loss": 0.4515, "loss_nan_ranks": 0, "loss_rank_avg": 0.46363526582717896, "step": 665, "valid_targets_mean": 2121.4, "valid_targets_min": 530 }, { "epoch": 1.072, "grad_norm": 0.7015136785171452, "learning_rate": 3.925572290297638e-05, "loss": 0.4273, "loss_nan_ranks": 0, "loss_rank_avg": 0.4679427146911621, "step": 670, "valid_targets_mean": 2227.5, "valid_targets_min": 683 }, { "epoch": 1.08, "grad_norm": 0.6123409957265331, "learning_rate": 3.9230357087034606e-05, "loss": 0.4504, "loss_nan_ranks": 0, "loss_rank_avg": 0.42846158146858215, "step": 675, "valid_targets_mean": 2303.3, "valid_targets_min": 370 }, { "epoch": 1.088, "grad_norm": 0.6261474208255726, "learning_rate": 3.9204574709942036e-05, "loss": 0.4302, "loss_nan_ranks": 0, "loss_rank_avg": 0.462083637714386, "step": 680, "valid_targets_mean": 2561.6, "valid_targets_min": 1184 }, { "epoch": 1.096, "grad_norm": 0.6088483216638148, "learning_rate": 3.917837633018734e-05, "loss": 0.4435, "loss_nan_ranks": 0, "loss_rank_avg": 0.4106854200363159, "step": 685, "valid_targets_mean": 2380.8, "valid_targets_min": 548 }, { "epoch": 1.104, "grad_norm": 0.6534853519400231, "learning_rate": 3.915176251527051e-05, "loss": 0.4238, "loss_nan_ranks": 0, "loss_rank_avg": 0.4356333017349243, "step": 690, "valid_targets_mean": 2245.1, "valid_targets_min": 1293 }, { "epoch": 1.112, "grad_norm": 0.7245451888948117, "learning_rate": 3.912473384169051e-05, "loss": 0.4383, "loss_nan_ranks": 0, "loss_rank_avg": 0.4790792465209961, "step": 695, "valid_targets_mean": 1835.5, "valid_targets_min": 306 }, { "epoch": 1.12, "grad_norm": 0.7305794087802582, "learning_rate": 3.9097290894932866e-05, "loss": 0.4387, "loss_nan_ranks": 0, "loss_rank_avg": 0.44803181290626526, "step": 700, "valid_targets_mean": 1999.6, "valid_targets_min": 749 }, { "epoch": 1.1280000000000001, "grad_norm": 0.7068826644187056, "learning_rate": 3.906943426945691e-05, "loss": 0.4305, "loss_nan_ranks": 0, "loss_rank_avg": 0.4128977060317993, "step": 705, "valid_targets_mean": 2458.2, "valid_targets_min": 814 }, { "epoch": 1.1360000000000001, "grad_norm": 0.6339840636291904, "learning_rate": 3.9041164568682955e-05, "loss": 0.4257, "loss_nan_ranks": 0, "loss_rank_avg": 0.4198548495769501, "step": 710, "valid_targets_mean": 2165.4, "valid_targets_min": 682 }, { "epoch": 1.144, "grad_norm": 0.5528313866253091, "learning_rate": 3.90124824049792e-05, "loss": 0.4455, "loss_nan_ranks": 0, "loss_rank_avg": 0.407492071390152, "step": 715, "valid_targets_mean": 2830.7, "valid_targets_min": 924 }, { "epoch": 1.152, "grad_norm": 0.6483857714237081, "learning_rate": 3.8983388399648465e-05, "loss": 0.4181, "loss_nan_ranks": 0, "loss_rank_avg": 0.42082953453063965, "step": 720, "valid_targets_mean": 2129.1, "valid_targets_min": 685 }, { "epoch": 1.16, "grad_norm": 0.7694201103576355, "learning_rate": 3.895388318291474e-05, "loss": 0.4276, "loss_nan_ranks": 0, "loss_rank_avg": 0.4231267273426056, "step": 725, "valid_targets_mean": 2458.7, "valid_targets_min": 670 }, { "epoch": 1.168, "grad_norm": 0.5506654953283203, "learning_rate": 3.892396739390952e-05, "loss": 0.4124, "loss_nan_ranks": 0, "loss_rank_avg": 0.42060285806655884, "step": 730, "valid_targets_mean": 2666.7, "valid_targets_min": 916 }, { "epoch": 1.176, "grad_norm": 0.6939198546037962, "learning_rate": 3.8893641680657986e-05, "loss": 0.4463, "loss_nan_ranks": 0, "loss_rank_avg": 0.40406715869903564, "step": 735, "valid_targets_mean": 2180.9, "valid_targets_min": 576 }, { "epoch": 1.184, "grad_norm": 0.638939912139877, "learning_rate": 3.886290670006495e-05, "loss": 0.4307, "loss_nan_ranks": 0, "loss_rank_avg": 0.42432937026023865, "step": 740, "valid_targets_mean": 2253.4, "valid_targets_min": 788 }, { "epoch": 1.192, "grad_norm": 0.6065560979177493, "learning_rate": 3.8831763117900605e-05, "loss": 0.4421, "loss_nan_ranks": 0, "loss_rank_avg": 0.42419037222862244, "step": 745, "valid_targets_mean": 2507.4, "valid_targets_min": 729 }, { "epoch": 1.2, "grad_norm": 0.6276197118956589, "learning_rate": 3.8800211608786166e-05, "loss": 0.4354, "loss_nan_ranks": 0, "loss_rank_avg": 0.44459959864616394, "step": 750, "valid_targets_mean": 2263.6, "valid_targets_min": 607 }, { "epoch": 1.208, "grad_norm": 0.5928256043581343, "learning_rate": 3.876825285617918e-05, "loss": 0.4252, "loss_nan_ranks": 0, "loss_rank_avg": 0.3992008864879608, "step": 755, "valid_targets_mean": 2632.4, "valid_targets_min": 837 }, { "epoch": 1.216, "grad_norm": 0.6932083981151625, "learning_rate": 3.873588755235876e-05, "loss": 0.445, "loss_nan_ranks": 0, "loss_rank_avg": 0.4538978934288025, "step": 760, "valid_targets_mean": 2446.4, "valid_targets_min": 919 }, { "epoch": 1.224, "grad_norm": 0.6600533915715574, "learning_rate": 3.870311639841062e-05, "loss": 0.4379, "loss_nan_ranks": 0, "loss_rank_avg": 0.46171343326568604, "step": 765, "valid_targets_mean": 2320.2, "valid_targets_min": 938 }, { "epoch": 1.232, "grad_norm": 0.6570908053641036, "learning_rate": 3.866994010421182e-05, "loss": 0.4341, "loss_nan_ranks": 0, "loss_rank_avg": 0.4576682448387146, "step": 770, "valid_targets_mean": 2393.4, "valid_targets_min": 1134 }, { "epoch": 1.24, "grad_norm": 0.6076898436586241, "learning_rate": 3.863635938841545e-05, "loss": 0.4333, "loss_nan_ranks": 0, "loss_rank_avg": 0.42452096939086914, "step": 775, "valid_targets_mean": 2579.1, "valid_targets_min": 1369 }, { "epoch": 1.248, "grad_norm": 0.6534130436400201, "learning_rate": 3.8602374978435015e-05, "loss": 0.4144, "loss_nan_ranks": 0, "loss_rank_avg": 0.4091978669166565, "step": 780, "valid_targets_mean": 2470.7, "valid_targets_min": 662 }, { "epoch": 1.256, "grad_norm": 0.601406785106393, "learning_rate": 3.8567987610428705e-05, "loss": 0.4455, "loss_nan_ranks": 0, "loss_rank_avg": 0.41432830691337585, "step": 785, "valid_targets_mean": 2529.9, "valid_targets_min": 997 }, { "epoch": 1.264, "grad_norm": 0.6533424854411587, "learning_rate": 3.853319802928345e-05, "loss": 0.4466, "loss_nan_ranks": 0, "loss_rank_avg": 0.4692718982696533, "step": 790, "valid_targets_mean": 2336.9, "valid_targets_min": 973 }, { "epoch": 1.272, "grad_norm": 0.682487385408, "learning_rate": 3.849800698859877e-05, "loss": 0.4366, "loss_nan_ranks": 0, "loss_rank_avg": 0.45809996128082275, "step": 795, "valid_targets_mean": 2192.1, "valid_targets_min": 737 }, { "epoch": 1.28, "grad_norm": 0.5916792104874027, "learning_rate": 3.846241525067047e-05, "loss": 0.4523, "loss_nan_ranks": 0, "loss_rank_avg": 0.44107410311698914, "step": 800, "valid_targets_mean": 2847.6, "valid_targets_min": 1315 }, { "epoch": 1.288, "grad_norm": 0.5829522122783333, "learning_rate": 3.842642358647411e-05, "loss": 0.4391, "loss_nan_ranks": 0, "loss_rank_avg": 0.4088653028011322, "step": 805, "valid_targets_mean": 2645.9, "valid_targets_min": 875 }, { "epoch": 1.296, "grad_norm": 0.5499814298234279, "learning_rate": 3.839003277564831e-05, "loss": 0.418, "loss_nan_ranks": 0, "loss_rank_avg": 0.4087795615196228, "step": 810, "valid_targets_mean": 2580.8, "valid_targets_min": 1486 }, { "epoch": 1.304, "grad_norm": 0.6574479760890423, "learning_rate": 3.835324360647785e-05, "loss": 0.4347, "loss_nan_ranks": 0, "loss_rank_avg": 0.4606819748878479, "step": 815, "valid_targets_mean": 2330.2, "valid_targets_min": 817 }, { "epoch": 1.312, "grad_norm": 0.601654460513071, "learning_rate": 3.831605687587663e-05, "loss": 0.4312, "loss_nan_ranks": 0, "loss_rank_avg": 0.4386676251888275, "step": 820, "valid_targets_mean": 2486.2, "valid_targets_min": 923 }, { "epoch": 1.32, "grad_norm": 0.6595379480500122, "learning_rate": 3.827847338937037e-05, "loss": 0.4383, "loss_nan_ranks": 0, "loss_rank_avg": 0.47224780917167664, "step": 825, "valid_targets_mean": 2481.6, "valid_targets_min": 1308 }, { "epoch": 1.328, "grad_norm": 0.6420505342297114, "learning_rate": 3.824049396107918e-05, "loss": 0.4555, "loss_nan_ranks": 0, "loss_rank_avg": 0.4457557797431946, "step": 830, "valid_targets_mean": 2432.2, "valid_targets_min": 1208 }, { "epoch": 1.336, "grad_norm": 0.6171185957266434, "learning_rate": 3.8202119413699914e-05, "loss": 0.4501, "loss_nan_ranks": 0, "loss_rank_avg": 0.4661561846733093, "step": 835, "valid_targets_mean": 2544.8, "valid_targets_min": 1655 }, { "epoch": 1.3439999999999999, "grad_norm": 0.7465821312386511, "learning_rate": 3.8163350578488366e-05, "loss": 0.4313, "loss_nan_ranks": 0, "loss_rank_avg": 0.4433724284172058, "step": 840, "valid_targets_mean": 2051.7, "valid_targets_min": 692 }, { "epoch": 1.3519999999999999, "grad_norm": 0.6169111289469572, "learning_rate": 3.812418829524124e-05, "loss": 0.4294, "loss_nan_ranks": 0, "loss_rank_avg": 0.412350058555603, "step": 845, "valid_targets_mean": 2435.8, "valid_targets_min": 589 }, { "epoch": 1.3599999999999999, "grad_norm": 0.7532082786283157, "learning_rate": 3.8084633412277974e-05, "loss": 0.463, "loss_nan_ranks": 0, "loss_rank_avg": 0.4547457695007324, "step": 850, "valid_targets_mean": 1884.3, "valid_targets_min": 713 }, { "epoch": 1.3679999999999999, "grad_norm": 0.6398375548597498, "learning_rate": 3.804468678642238e-05, "loss": 0.4541, "loss_nan_ranks": 0, "loss_rank_avg": 0.42184194922447205, "step": 855, "valid_targets_mean": 2393.6, "valid_targets_min": 615 }, { "epoch": 1.376, "grad_norm": 0.631628642707112, "learning_rate": 3.800434928298403e-05, "loss": 0.4376, "loss_nan_ranks": 0, "loss_rank_avg": 0.41081473231315613, "step": 860, "valid_targets_mean": 2298.9, "valid_targets_min": 662 }, { "epoch": 1.384, "grad_norm": 0.6933767743328803, "learning_rate": 3.796362177573957e-05, "loss": 0.4417, "loss_nan_ranks": 0, "loss_rank_avg": 0.4457859694957733, "step": 865, "valid_targets_mean": 2026.8, "valid_targets_min": 720 }, { "epoch": 1.392, "grad_norm": 1.369681479343786, "learning_rate": 3.792250514691378e-05, "loss": 0.438, "loss_nan_ranks": 0, "loss_rank_avg": 0.3672015368938446, "step": 870, "valid_targets_mean": 3045.9, "valid_targets_min": 1121 }, { "epoch": 1.4, "grad_norm": 0.6162913143906541, "learning_rate": 3.788100028716043e-05, "loss": 0.4476, "loss_nan_ranks": 0, "loss_rank_avg": 0.4049740731716156, "step": 875, "valid_targets_mean": 2585.2, "valid_targets_min": 676 }, { "epoch": 1.408, "grad_norm": 0.6079961919327072, "learning_rate": 3.7839108095543016e-05, "loss": 0.4309, "loss_nan_ranks": 0, "loss_rank_avg": 0.405215322971344, "step": 880, "valid_targets_mean": 2377.9, "valid_targets_min": 889 }, { "epoch": 1.416, "grad_norm": 0.6785807936076146, "learning_rate": 3.7796829479515295e-05, "loss": 0.4266, "loss_nan_ranks": 0, "loss_rank_avg": 0.4144185185432434, "step": 885, "valid_targets_mean": 2151.0, "valid_targets_min": 642 }, { "epoch": 1.424, "grad_norm": 0.6131872618040544, "learning_rate": 3.775416535490159e-05, "loss": 0.4558, "loss_nan_ranks": 0, "loss_rank_avg": 0.4448813199996948, "step": 890, "valid_targets_mean": 2378.1, "valid_targets_min": 654 }, { "epoch": 1.432, "grad_norm": 0.6613538155685743, "learning_rate": 3.7711116645876984e-05, "loss": 0.4357, "loss_nan_ranks": 0, "loss_rank_avg": 0.4319063127040863, "step": 895, "valid_targets_mean": 2121.9, "valid_targets_min": 585 }, { "epoch": 1.44, "grad_norm": 0.7462290977697131, "learning_rate": 3.7667684284947286e-05, "loss": 0.4436, "loss_nan_ranks": 0, "loss_rank_avg": 0.4267106056213379, "step": 900, "valid_targets_mean": 2381.2, "valid_targets_min": 507 }, { "epoch": 1.448, "grad_norm": 0.6275859403485409, "learning_rate": 3.762386921292885e-05, "loss": 0.454, "loss_nan_ranks": 0, "loss_rank_avg": 0.45304590463638306, "step": 905, "valid_targets_mean": 2642.2, "valid_targets_min": 978 }, { "epoch": 1.456, "grad_norm": 0.5666889148480784, "learning_rate": 3.757967237892818e-05, "loss": 0.4302, "loss_nan_ranks": 0, "loss_rank_avg": 0.38631516695022583, "step": 910, "valid_targets_mean": 2592.1, "valid_targets_min": 590 }, { "epoch": 1.464, "grad_norm": 0.5662953110489375, "learning_rate": 3.7535094740321334e-05, "loss": 0.447, "loss_nan_ranks": 0, "loss_rank_avg": 0.4322415888309479, "step": 915, "valid_targets_mean": 2557.4, "valid_targets_min": 689 }, { "epoch": 1.472, "grad_norm": 0.5882496676672685, "learning_rate": 3.749013726273328e-05, "loss": 0.4283, "loss_nan_ranks": 0, "loss_rank_avg": 0.4422641694545746, "step": 920, "valid_targets_mean": 2509.5, "valid_targets_min": 1141 }, { "epoch": 1.48, "grad_norm": 0.5776506126678617, "learning_rate": 3.7444800920016875e-05, "loss": 0.4186, "loss_nan_ranks": 0, "loss_rank_avg": 0.4118436276912689, "step": 925, "valid_targets_mean": 2366.6, "valid_targets_min": 597 }, { "epoch": 1.488, "grad_norm": 0.6271293382346671, "learning_rate": 3.7399086694231864e-05, "loss": 0.4488, "loss_nan_ranks": 0, "loss_rank_avg": 0.4351826608181, "step": 930, "valid_targets_mean": 2374.8, "valid_targets_min": 1080 }, { "epoch": 1.496, "grad_norm": 0.7241735711030381, "learning_rate": 3.735299557562352e-05, "loss": 0.4349, "loss_nan_ranks": 0, "loss_rank_avg": 0.4319533407688141, "step": 935, "valid_targets_mean": 2160.5, "valid_targets_min": 852 }, { "epoch": 1.504, "grad_norm": 0.7100669252442159, "learning_rate": 3.7306528562601245e-05, "loss": 0.4398, "loss_nan_ranks": 0, "loss_rank_avg": 0.45116013288497925, "step": 940, "valid_targets_mean": 2070.9, "valid_targets_min": 764 }, { "epoch": 1.512, "grad_norm": 0.7200292914345058, "learning_rate": 3.7259686661716945e-05, "loss": 0.4475, "loss_nan_ranks": 0, "loss_rank_avg": 0.49465733766555786, "step": 945, "valid_targets_mean": 2056.4, "valid_targets_min": 753 }, { "epoch": 1.52, "grad_norm": 0.6763700134990425, "learning_rate": 3.7212470887643204e-05, "loss": 0.4372, "loss_nan_ranks": 0, "loss_rank_avg": 0.4461861252784729, "step": 950, "valid_targets_mean": 2200.1, "valid_targets_min": 1060 }, { "epoch": 1.528, "grad_norm": 0.6213853943162089, "learning_rate": 3.7164882263151315e-05, "loss": 0.4394, "loss_nan_ranks": 0, "loss_rank_avg": 0.4571177065372467, "step": 955, "valid_targets_mean": 2428.8, "valid_targets_min": 930 }, { "epoch": 1.536, "grad_norm": 0.6604591254411275, "learning_rate": 3.711692181908913e-05, "loss": 0.4312, "loss_nan_ranks": 0, "loss_rank_avg": 0.44394445419311523, "step": 960, "valid_targets_mean": 2190.9, "valid_targets_min": 756 }, { "epoch": 1.544, "grad_norm": 0.621120610099597, "learning_rate": 3.706859059435871e-05, "loss": 0.4395, "loss_nan_ranks": 0, "loss_rank_avg": 0.4528801441192627, "step": 965, "valid_targets_mean": 2357.9, "valid_targets_min": 714 }, { "epoch": 1.552, "grad_norm": 0.6479086626249172, "learning_rate": 3.701988963589384e-05, "loss": 0.4319, "loss_nan_ranks": 0, "loss_rank_avg": 0.4565218687057495, "step": 970, "valid_targets_mean": 2357.4, "valid_targets_min": 678 }, { "epoch": 1.56, "grad_norm": 0.5929729443171671, "learning_rate": 3.697081999863736e-05, "loss": 0.4426, "loss_nan_ranks": 0, "loss_rank_avg": 0.41704872250556946, "step": 975, "valid_targets_mean": 2477.4, "valid_targets_min": 658 }, { "epoch": 1.568, "grad_norm": 0.7034370523967252, "learning_rate": 3.692138274551828e-05, "loss": 0.4317, "loss_nan_ranks": 0, "loss_rank_avg": 0.4655498266220093, "step": 980, "valid_targets_mean": 2352.0, "valid_targets_min": 1173 }, { "epoch": 1.576, "grad_norm": 0.6817636602005042, "learning_rate": 3.687157894742878e-05, "loss": 0.4379, "loss_nan_ranks": 0, "loss_rank_avg": 0.4376213848590851, "step": 985, "valid_targets_mean": 2075.2, "valid_targets_min": 604 }, { "epoch": 1.584, "grad_norm": 0.6485914336603663, "learning_rate": 3.682140968320101e-05, "loss": 0.4332, "loss_nan_ranks": 0, "loss_rank_avg": 0.4256216585636139, "step": 990, "valid_targets_mean": 2463.4, "valid_targets_min": 1065 }, { "epoch": 1.592, "grad_norm": 0.5841306362302757, "learning_rate": 3.6770876039583725e-05, "loss": 0.4545, "loss_nan_ranks": 0, "loss_rank_avg": 0.4446376860141754, "step": 995, "valid_targets_mean": 2528.8, "valid_targets_min": 761 }, { "epoch": 1.6, "grad_norm": 0.6434175836703774, "learning_rate": 3.671997911121871e-05, "loss": 0.4571, "loss_nan_ranks": 0, "loss_rank_avg": 0.44120827317237854, "step": 1000, "valid_targets_mean": 2088.9, "valid_targets_min": 686 }, { "epoch": 1.608, "grad_norm": 0.6288341619537156, "learning_rate": 3.6668720000617126e-05, "loss": 0.4208, "loss_nan_ranks": 0, "loss_rank_avg": 0.4059444069862366, "step": 1005, "valid_targets_mean": 2276.5, "valid_targets_min": 786 }, { "epoch": 1.616, "grad_norm": 0.5844705403803708, "learning_rate": 3.661709981813558e-05, "loss": 0.4345, "loss_nan_ranks": 0, "loss_rank_avg": 0.4230886399745941, "step": 1010, "valid_targets_mean": 2555.6, "valid_targets_min": 809 }, { "epoch": 1.624, "grad_norm": 0.6244939134987266, "learning_rate": 3.6565119681952086e-05, "loss": 0.4553, "loss_nan_ranks": 0, "loss_rank_avg": 0.4502819776535034, "step": 1015, "valid_targets_mean": 2212.4, "valid_targets_min": 442 }, { "epoch": 1.6320000000000001, "grad_norm": 0.6173544056476045, "learning_rate": 3.651278071804186e-05, "loss": 0.4232, "loss_nan_ranks": 0, "loss_rank_avg": 0.4541641175746918, "step": 1020, "valid_targets_mean": 2676.3, "valid_targets_min": 2111 }, { "epoch": 1.6400000000000001, "grad_norm": 0.6326476054076543, "learning_rate": 3.646008406015291e-05, "loss": 0.4424, "loss_nan_ranks": 0, "loss_rank_avg": 0.42796561121940613, "step": 1025, "valid_targets_mean": 2486.1, "valid_targets_min": 967 }, { "epoch": 1.6480000000000001, "grad_norm": 0.6216395857761358, "learning_rate": 3.6407030849781475e-05, "loss": 0.433, "loss_nan_ranks": 0, "loss_rank_avg": 0.44164803624153137, "step": 1030, "valid_targets_mean": 2380.2, "valid_targets_min": 883 }, { "epoch": 1.6560000000000001, "grad_norm": 0.5707144839810783, "learning_rate": 3.635362223614733e-05, "loss": 0.4237, "loss_nan_ranks": 0, "loss_rank_avg": 0.402190625667572, "step": 1035, "valid_targets_mean": 2646.9, "valid_targets_min": 875 }, { "epoch": 1.6640000000000001, "grad_norm": 0.6326893009965505, "learning_rate": 3.629985937616884e-05, "loss": 0.4299, "loss_nan_ranks": 0, "loss_rank_avg": 0.4385002553462982, "step": 1040, "valid_targets_mean": 2627.1, "valid_targets_min": 657 }, { "epoch": 1.6720000000000002, "grad_norm": 0.6175015547121943, "learning_rate": 3.624574343443794e-05, "loss": 0.4271, "loss_nan_ranks": 0, "loss_rank_avg": 0.4224476218223572, "step": 1045, "valid_targets_mean": 2436.2, "valid_targets_min": 776 }, { "epoch": 1.6800000000000002, "grad_norm": 0.6235914122987322, "learning_rate": 3.619127558319492e-05, "loss": 0.4613, "loss_nan_ranks": 0, "loss_rank_avg": 0.46333080530166626, "step": 1050, "valid_targets_mean": 2319.1, "valid_targets_min": 1166 }, { "epoch": 1.688, "grad_norm": 0.6265949309393127, "learning_rate": 3.613645700230298e-05, "loss": 0.4432, "loss_nan_ranks": 0, "loss_rank_avg": 0.46041712164878845, "step": 1055, "valid_targets_mean": 2153.2, "valid_targets_min": 659 }, { "epoch": 1.696, "grad_norm": 0.5655935170182097, "learning_rate": 3.6081288879222696e-05, "loss": 0.44, "loss_nan_ranks": 0, "loss_rank_avg": 0.40793120861053467, "step": 1060, "valid_targets_mean": 2697.2, "valid_targets_min": 612 }, { "epoch": 1.704, "grad_norm": 0.6618385447827254, "learning_rate": 3.602577240898633e-05, "loss": 0.4336, "loss_nan_ranks": 0, "loss_rank_avg": 0.4535277485847473, "step": 1065, "valid_targets_mean": 2301.0, "valid_targets_min": 387 }, { "epoch": 1.712, "grad_norm": 0.5771516778336349, "learning_rate": 3.596990879417188e-05, "loss": 0.4261, "loss_nan_ranks": 0, "loss_rank_avg": 0.40155816078186035, "step": 1070, "valid_targets_mean": 2554.8, "valid_targets_min": 612 }, { "epoch": 1.72, "grad_norm": 0.6882606659694092, "learning_rate": 3.591369924487711e-05, "loss": 0.4251, "loss_nan_ranks": 0, "loss_rank_avg": 0.4629209041595459, "step": 1075, "valid_targets_mean": 1866.8, "valid_targets_min": 558 }, { "epoch": 1.728, "grad_norm": 0.6766607679488811, "learning_rate": 3.585714497869326e-05, "loss": 0.4374, "loss_nan_ranks": 0, "loss_rank_avg": 0.45873624086380005, "step": 1080, "valid_targets_mean": 2035.2, "valid_targets_min": 580 }, { "epoch": 1.736, "grad_norm": 0.589073541077514, "learning_rate": 3.580024722067872e-05, "loss": 0.4226, "loss_nan_ranks": 0, "loss_rank_avg": 0.4332297444343567, "step": 1085, "valid_targets_mean": 2441.6, "valid_targets_min": 604 }, { "epoch": 1.744, "grad_norm": 0.6024297435143611, "learning_rate": 3.574300720333247e-05, "loss": 0.4235, "loss_nan_ranks": 0, "loss_rank_avg": 0.44380128383636475, "step": 1090, "valid_targets_mean": 2427.1, "valid_targets_min": 944 }, { "epoch": 1.752, "grad_norm": 0.5863852076388184, "learning_rate": 3.568542616656739e-05, "loss": 0.4374, "loss_nan_ranks": 0, "loss_rank_avg": 0.4471275210380554, "step": 1095, "valid_targets_mean": 2633.6, "valid_targets_min": 1157 }, { "epoch": 1.76, "grad_norm": 0.5884843210677144, "learning_rate": 3.5627505357683404e-05, "loss": 0.4324, "loss_nan_ranks": 0, "loss_rank_avg": 0.42342448234558105, "step": 1100, "valid_targets_mean": 2645.6, "valid_targets_min": 627 }, { "epoch": 1.768, "grad_norm": 0.6645389429055254, "learning_rate": 3.5569246031340474e-05, "loss": 0.439, "loss_nan_ranks": 0, "loss_rank_avg": 0.4746902585029602, "step": 1105, "valid_targets_mean": 2341.7, "valid_targets_min": 628 }, { "epoch": 1.776, "grad_norm": 0.5966469996066948, "learning_rate": 3.5510649449531375e-05, "loss": 0.4271, "loss_nan_ranks": 0, "loss_rank_avg": 0.42743945121765137, "step": 1110, "valid_targets_mean": 2537.6, "valid_targets_min": 643 }, { "epoch": 1.784, "grad_norm": 0.5878248615166197, "learning_rate": 3.545171688155441e-05, "loss": 0.4213, "loss_nan_ranks": 0, "loss_rank_avg": 0.430372029542923, "step": 1115, "valid_targets_mean": 2542.7, "valid_targets_min": 1068 }, { "epoch": 1.792, "grad_norm": 0.6343590604430001, "learning_rate": 3.5392449603985894e-05, "loss": 0.4437, "loss_nan_ranks": 0, "loss_rank_avg": 0.43726491928100586, "step": 1120, "valid_targets_mean": 2325.7, "valid_targets_min": 676 }, { "epoch": 1.8, "grad_norm": 0.536815346738192, "learning_rate": 3.53328489006525e-05, "loss": 0.4202, "loss_nan_ranks": 0, "loss_rank_avg": 0.3983931243419647, "step": 1125, "valid_targets_mean": 2698.4, "valid_targets_min": 1589 }, { "epoch": 1.808, "grad_norm": 0.5844130028956325, "learning_rate": 3.527291606260345e-05, "loss": 0.4146, "loss_nan_ranks": 0, "loss_rank_avg": 0.38655996322631836, "step": 1130, "valid_targets_mean": 2262.7, "valid_targets_min": 926 }, { "epoch": 1.8159999999999998, "grad_norm": 0.6433258701700891, "learning_rate": 3.521265238808255e-05, "loss": 0.4425, "loss_nan_ranks": 0, "loss_rank_avg": 0.42823395133018494, "step": 1135, "valid_targets_mean": 1968.8, "valid_targets_min": 1033 }, { "epoch": 1.8239999999999998, "grad_norm": 0.582115811790589, "learning_rate": 3.515205918250007e-05, "loss": 0.4405, "loss_nan_ranks": 0, "loss_rank_avg": 0.43498510122299194, "step": 1140, "valid_targets_mean": 2542.1, "valid_targets_min": 702 }, { "epoch": 1.8319999999999999, "grad_norm": 0.6326850159151952, "learning_rate": 3.5091137758404456e-05, "loss": 0.4476, "loss_nan_ranks": 0, "loss_rank_avg": 0.44848906993865967, "step": 1145, "valid_targets_mean": 2340.6, "valid_targets_min": 945 }, { "epoch": 1.8399999999999999, "grad_norm": 1.5302422103990057, "learning_rate": 3.5029889435453924e-05, "loss": 0.4198, "loss_nan_ranks": 0, "loss_rank_avg": 0.45762234926223755, "step": 1150, "valid_targets_mean": 2221.9, "valid_targets_min": 595 }, { "epoch": 1.8479999999999999, "grad_norm": 0.4970864914525463, "learning_rate": 3.496831554038784e-05, "loss": 0.4112, "loss_nan_ranks": 0, "loss_rank_avg": 0.3754098117351532, "step": 1155, "valid_targets_mean": 3324.8, "valid_targets_min": 626 }, { "epoch": 1.8559999999999999, "grad_norm": 0.66895938733825, "learning_rate": 3.490641740699801e-05, "loss": 0.4285, "loss_nan_ranks": 0, "loss_rank_avg": 0.4742193818092346, "step": 1160, "valid_targets_mean": 2048.9, "valid_targets_min": 619 }, { "epoch": 1.8639999999999999, "grad_norm": 0.5433939301259606, "learning_rate": 3.484419637609977e-05, "loss": 0.4266, "loss_nan_ranks": 0, "loss_rank_avg": 0.40021824836730957, "step": 1165, "valid_targets_mean": 2779.8, "valid_targets_min": 1797 }, { "epoch": 1.8719999999999999, "grad_norm": 1.0415130550227225, "learning_rate": 3.478165379550292e-05, "loss": 0.4339, "loss_nan_ranks": 0, "loss_rank_avg": 0.4287850260734558, "step": 1170, "valid_targets_mean": 2352.6, "valid_targets_min": 833 }, { "epoch": 1.88, "grad_norm": 0.5923170984142792, "learning_rate": 3.471879101998262e-05, "loss": 0.4601, "loss_nan_ranks": 0, "loss_rank_avg": 0.44315701723098755, "step": 1175, "valid_targets_mean": 2611.9, "valid_targets_min": 1523 }, { "epoch": 1.888, "grad_norm": 0.6483765666231087, "learning_rate": 3.465560941124992e-05, "loss": 0.4403, "loss_nan_ranks": 0, "loss_rank_avg": 0.484798789024353, "step": 1180, "valid_targets_mean": 2279.2, "valid_targets_min": 738 }, { "epoch": 1.896, "grad_norm": 0.5219942435392709, "learning_rate": 3.459211033792233e-05, "loss": 0.431, "loss_nan_ranks": 0, "loss_rank_avg": 0.41061511635780334, "step": 1185, "valid_targets_mean": 2872.6, "valid_targets_min": 730 }, { "epoch": 1.904, "grad_norm": 0.6395729031124877, "learning_rate": 3.4528295175494194e-05, "loss": 0.4394, "loss_nan_ranks": 0, "loss_rank_avg": 0.4299981892108917, "step": 1190, "valid_targets_mean": 2462.9, "valid_targets_min": 796 }, { "epoch": 1.912, "grad_norm": 0.5643734986587181, "learning_rate": 3.4464165306306845e-05, "loss": 0.435, "loss_nan_ranks": 0, "loss_rank_avg": 0.400689959526062, "step": 1195, "valid_targets_mean": 2524.4, "valid_targets_min": 574 }, { "epoch": 1.92, "grad_norm": 0.7243790486340214, "learning_rate": 3.4399722119518675e-05, "loss": 0.4532, "loss_nan_ranks": 0, "loss_rank_avg": 0.4652594029903412, "step": 1200, "valid_targets_mean": 2036.8, "valid_targets_min": 848 }, { "epoch": 1.928, "grad_norm": 0.5478422029867802, "learning_rate": 3.433496701107506e-05, "loss": 0.4267, "loss_nan_ranks": 0, "loss_rank_avg": 0.3938635587692261, "step": 1205, "valid_targets_mean": 2800.8, "valid_targets_min": 1483 }, { "epoch": 1.936, "grad_norm": 0.6541759373256487, "learning_rate": 3.426990138367813e-05, "loss": 0.4301, "loss_nan_ranks": 0, "loss_rank_avg": 0.45855993032455444, "step": 1210, "valid_targets_mean": 2101.4, "valid_targets_min": 870 }, { "epoch": 1.944, "grad_norm": 0.6136604495164271, "learning_rate": 3.420452664675633e-05, "loss": 0.4336, "loss_nan_ranks": 0, "loss_rank_avg": 0.4466904103755951, "step": 1215, "valid_targets_mean": 2156.8, "valid_targets_min": 624 }, { "epoch": 1.952, "grad_norm": 0.6311990378635277, "learning_rate": 3.4138844216433946e-05, "loss": 0.4371, "loss_nan_ranks": 0, "loss_rank_avg": 0.4521297812461853, "step": 1220, "valid_targets_mean": 2365.5, "valid_targets_min": 1199 }, { "epoch": 1.96, "grad_norm": 0.5991130836480716, "learning_rate": 3.407285551550041e-05, "loss": 0.4384, "loss_nan_ranks": 0, "loss_rank_avg": 0.44246920943260193, "step": 1225, "valid_targets_mean": 2326.0, "valid_targets_min": 1000 }, { "epoch": 1.968, "grad_norm": 0.672967674746338, "learning_rate": 3.4006561973379466e-05, "loss": 0.4581, "loss_nan_ranks": 0, "loss_rank_avg": 0.4707268476486206, "step": 1230, "valid_targets_mean": 2091.6, "valid_targets_min": 631 }, { "epoch": 1.976, "grad_norm": 0.5791823883862799, "learning_rate": 3.3939965026098245e-05, "loss": 0.4197, "loss_nan_ranks": 0, "loss_rank_avg": 0.4041367173194885, "step": 1235, "valid_targets_mean": 2632.2, "valid_targets_min": 894 }, { "epoch": 1.984, "grad_norm": 0.5504582884768956, "learning_rate": 3.38730661162561e-05, "loss": 0.432, "loss_nan_ranks": 0, "loss_rank_avg": 0.38332805037498474, "step": 1240, "valid_targets_mean": 2613.8, "valid_targets_min": 1539 }, { "epoch": 1.992, "grad_norm": 0.6019398759021497, "learning_rate": 3.3805866692993414e-05, "loss": 0.441, "loss_nan_ranks": 0, "loss_rank_avg": 0.42343538999557495, "step": 1245, "valid_targets_mean": 2393.1, "valid_targets_min": 832 }, { "epoch": 2.0, "grad_norm": 0.6142721800773309, "learning_rate": 3.373836821196018e-05, "loss": 0.431, "loss_nan_ranks": 0, "loss_rank_avg": 0.4213658273220062, "step": 1250, "valid_targets_mean": 2225.6, "valid_targets_min": 809 }, { "epoch": 2.008, "grad_norm": 0.566984260838742, "learning_rate": 3.3670572135284456e-05, "loss": 0.4041, "loss_nan_ranks": 0, "loss_rank_avg": 0.3673563599586487, "step": 1255, "valid_targets_mean": 2545.2, "valid_targets_min": 1030 }, { "epoch": 2.016, "grad_norm": 0.6282588632444733, "learning_rate": 3.360247993154073e-05, "loss": 0.4094, "loss_nan_ranks": 0, "loss_rank_avg": 0.39245718717575073, "step": 1260, "valid_targets_mean": 2090.2, "valid_targets_min": 774 }, { "epoch": 2.024, "grad_norm": 0.68740299633641, "learning_rate": 3.35340930757181e-05, "loss": 0.4212, "loss_nan_ranks": 0, "loss_rank_avg": 0.3981652557849884, "step": 1265, "valid_targets_mean": 2120.9, "valid_targets_min": 762 }, { "epoch": 2.032, "grad_norm": 0.5831769575223239, "learning_rate": 3.3465413049188276e-05, "loss": 0.4032, "loss_nan_ranks": 0, "loss_rank_avg": 0.3730853199958801, "step": 1270, "valid_targets_mean": 2617.3, "valid_targets_min": 1533 }, { "epoch": 2.04, "grad_norm": 0.6249850110520228, "learning_rate": 3.3396441339673564e-05, "loss": 0.4142, "loss_nan_ranks": 0, "loss_rank_avg": 0.4278792142868042, "step": 1275, "valid_targets_mean": 2506.9, "valid_targets_min": 673 }, { "epoch": 2.048, "grad_norm": 0.6088510037795549, "learning_rate": 3.3327179441214574e-05, "loss": 0.405, "loss_nan_ranks": 0, "loss_rank_avg": 0.42335671186447144, "step": 1280, "valid_targets_mean": 2289.5, "valid_targets_min": 1072 }, { "epoch": 2.056, "grad_norm": 0.5728322801056469, "learning_rate": 3.325762885413791e-05, "loss": 0.4085, "loss_nan_ranks": 0, "loss_rank_avg": 0.4096108078956604, "step": 1285, "valid_targets_mean": 2627.4, "valid_targets_min": 1331 }, { "epoch": 2.064, "grad_norm": 0.6842908305749249, "learning_rate": 3.318779108502362e-05, "loss": 0.4148, "loss_nan_ranks": 0, "loss_rank_avg": 0.43703195452690125, "step": 1290, "valid_targets_mean": 2156.1, "valid_targets_min": 786 }, { "epoch": 2.072, "grad_norm": 0.6447442725403991, "learning_rate": 3.3117667646672616e-05, "loss": 0.3979, "loss_nan_ranks": 0, "loss_rank_avg": 0.3940478265285492, "step": 1295, "valid_targets_mean": 2325.4, "valid_targets_min": 740 }, { "epoch": 2.08, "grad_norm": 0.6428951801040104, "learning_rate": 3.304726005807386e-05, "loss": 0.3826, "loss_nan_ranks": 0, "loss_rank_avg": 0.40893763303756714, "step": 1300, "valid_targets_mean": 2247.2, "valid_targets_min": 688 }, { "epoch": 2.088, "grad_norm": 0.6135025402919587, "learning_rate": 3.297656984437148e-05, "loss": 0.4101, "loss_nan_ranks": 0, "loss_rank_avg": 0.3879171311855316, "step": 1305, "valid_targets_mean": 2372.0, "valid_targets_min": 639 }, { "epoch": 2.096, "grad_norm": 0.6652936744353024, "learning_rate": 3.2905598536831715e-05, "loss": 0.4239, "loss_nan_ranks": 0, "loss_rank_avg": 0.44061246514320374, "step": 1310, "valid_targets_mean": 2154.2, "valid_targets_min": 743 }, { "epoch": 2.104, "grad_norm": 0.6523375950116365, "learning_rate": 3.2834347672809776e-05, "loss": 0.3928, "loss_nan_ranks": 0, "loss_rank_avg": 0.41073179244995117, "step": 1315, "valid_targets_mean": 2811.1, "valid_targets_min": 1831 }, { "epoch": 2.112, "grad_norm": 0.6077762062428691, "learning_rate": 3.276281879571651e-05, "loss": 0.4217, "loss_nan_ranks": 0, "loss_rank_avg": 0.39779356122016907, "step": 1320, "valid_targets_mean": 2443.5, "valid_targets_min": 855 }, { "epoch": 2.12, "grad_norm": 0.5793040239747499, "learning_rate": 3.2691013454985006e-05, "loss": 0.4044, "loss_nan_ranks": 0, "loss_rank_avg": 0.3803567886352539, "step": 1325, "valid_targets_mean": 2550.6, "valid_targets_min": 786 }, { "epoch": 2.128, "grad_norm": 0.6680476265501086, "learning_rate": 3.2618933206036994e-05, "loss": 0.3901, "loss_nan_ranks": 0, "loss_rank_avg": 0.4052245020866394, "step": 1330, "valid_targets_mean": 2341.2, "valid_targets_min": 776 }, { "epoch": 2.136, "grad_norm": 0.6492406169897352, "learning_rate": 3.2546579610249177e-05, "loss": 0.4064, "loss_nan_ranks": 0, "loss_rank_avg": 0.42024415731430054, "step": 1335, "valid_targets_mean": 2130.4, "valid_targets_min": 633 }, { "epoch": 2.144, "grad_norm": 0.6204846916763979, "learning_rate": 3.2473954234919386e-05, "loss": 0.4137, "loss_nan_ranks": 0, "loss_rank_avg": 0.41287142038345337, "step": 1340, "valid_targets_mean": 2284.4, "valid_targets_min": 628 }, { "epoch": 2.152, "grad_norm": 0.6233759211732591, "learning_rate": 3.240105865323266e-05, "loss": 0.4232, "loss_nan_ranks": 0, "loss_rank_avg": 0.4033377170562744, "step": 1345, "valid_targets_mean": 2149.9, "valid_targets_min": 522 }, { "epoch": 2.16, "grad_norm": 0.5895554499438427, "learning_rate": 3.232789444422714e-05, "loss": 0.3885, "loss_nan_ranks": 0, "loss_rank_avg": 0.3827977776527405, "step": 1350, "valid_targets_mean": 2620.0, "valid_targets_min": 1318 }, { "epoch": 2.168, "grad_norm": 0.5777062788179502, "learning_rate": 3.225446319275988e-05, "loss": 0.4128, "loss_nan_ranks": 0, "loss_rank_avg": 0.383849561214447, "step": 1355, "valid_targets_mean": 2806.8, "valid_targets_min": 1167 }, { "epoch": 2.176, "grad_norm": 0.6442434899214516, "learning_rate": 3.218076648947251e-05, "loss": 0.4079, "loss_nan_ranks": 0, "loss_rank_avg": 0.4065432548522949, "step": 1360, "valid_targets_mean": 2231.4, "valid_targets_min": 1305 }, { "epoch": 2.184, "grad_norm": 0.6501192972774998, "learning_rate": 3.2106805930756804e-05, "loss": 0.4055, "loss_nan_ranks": 0, "loss_rank_avg": 0.42025482654571533, "step": 1365, "valid_targets_mean": 2148.1, "valid_targets_min": 670 }, { "epoch": 2.192, "grad_norm": 0.6526241135703132, "learning_rate": 3.2032583118720045e-05, "loss": 0.4223, "loss_nan_ranks": 0, "loss_rank_avg": 0.42404836416244507, "step": 1370, "valid_targets_mean": 2050.2, "valid_targets_min": 627 }, { "epoch": 2.2, "grad_norm": 0.6417831053568079, "learning_rate": 3.195809966115038e-05, "loss": 0.4099, "loss_nan_ranks": 0, "loss_rank_avg": 0.41074275970458984, "step": 1375, "valid_targets_mean": 2165.0, "valid_targets_min": 670 }, { "epoch": 2.208, "grad_norm": 0.6858826218890438, "learning_rate": 3.188335717148195e-05, "loss": 0.4048, "loss_nan_ranks": 0, "loss_rank_avg": 0.44892221689224243, "step": 1380, "valid_targets_mean": 2091.4, "valid_targets_min": 417 }, { "epoch": 2.216, "grad_norm": 0.5964347328155009, "learning_rate": 3.1808357268759964e-05, "loss": 0.4004, "loss_nan_ranks": 0, "loss_rank_avg": 0.40602582693099976, "step": 1385, "valid_targets_mean": 2580.9, "valid_targets_min": 777 }, { "epoch": 2.224, "grad_norm": 0.6055478522763069, "learning_rate": 3.173310157760563e-05, "loss": 0.4104, "loss_nan_ranks": 0, "loss_rank_avg": 0.4049045145511627, "step": 1390, "valid_targets_mean": 2326.7, "valid_targets_min": 618 }, { "epoch": 2.232, "grad_norm": 0.5682560854626032, "learning_rate": 3.165759172818093e-05, "loss": 0.3807, "loss_nan_ranks": 0, "loss_rank_avg": 0.38516765832901, "step": 1395, "valid_targets_mean": 2537.3, "valid_targets_min": 989 }, { "epoch": 2.24, "grad_norm": 0.6299417028086615, "learning_rate": 3.158182935615336e-05, "loss": 0.4026, "loss_nan_ranks": 0, "loss_rank_avg": 0.40374094247817993, "step": 1400, "valid_targets_mean": 2275.2, "valid_targets_min": 637 }, { "epoch": 2.248, "grad_norm": 0.6135628464170894, "learning_rate": 3.150581610266046e-05, "loss": 0.4128, "loss_nan_ranks": 0, "loss_rank_avg": 0.40207067131996155, "step": 1405, "valid_targets_mean": 2404.6, "valid_targets_min": 471 }, { "epoch": 2.2560000000000002, "grad_norm": 0.5163121688960695, "learning_rate": 3.1429553614274256e-05, "loss": 0.3811, "loss_nan_ranks": 0, "loss_rank_avg": 0.34493958950042725, "step": 1410, "valid_targets_mean": 2794.7, "valid_targets_min": 1072 }, { "epoch": 2.2640000000000002, "grad_norm": 0.5797314117990449, "learning_rate": 3.1353043542965636e-05, "loss": 0.395, "loss_nan_ranks": 0, "loss_rank_avg": 0.3778391480445862, "step": 1415, "valid_targets_mean": 2505.6, "valid_targets_min": 1141 }, { "epoch": 2.2720000000000002, "grad_norm": 0.6351152541067143, "learning_rate": 3.1276287546068536e-05, "loss": 0.4089, "loss_nan_ranks": 0, "loss_rank_avg": 0.4025972783565521, "step": 1420, "valid_targets_mean": 2322.2, "valid_targets_min": 762 }, { "epoch": 2.2800000000000002, "grad_norm": 0.6347905422148297, "learning_rate": 3.1199287286244047e-05, "loss": 0.3766, "loss_nan_ranks": 0, "loss_rank_avg": 0.42649704217910767, "step": 1425, "valid_targets_mean": 2317.1, "valid_targets_min": 747 }, { "epoch": 2.288, "grad_norm": 0.6944841579796738, "learning_rate": 3.112204443144438e-05, "loss": 0.4135, "loss_nan_ranks": 0, "loss_rank_avg": 0.44431525468826294, "step": 1430, "valid_targets_mean": 2030.6, "valid_targets_min": 747 }, { "epoch": 2.296, "grad_norm": 0.5977702075080438, "learning_rate": 3.1044560654876775e-05, "loss": 0.4128, "loss_nan_ranks": 0, "loss_rank_avg": 0.4044709801673889, "step": 1435, "valid_targets_mean": 2327.9, "valid_targets_min": 689 }, { "epoch": 2.304, "grad_norm": 0.6377348502580388, "learning_rate": 3.0966837634967215e-05, "loss": 0.4011, "loss_nan_ranks": 0, "loss_rank_avg": 0.42452406883239746, "step": 1440, "valid_targets_mean": 2312.2, "valid_targets_min": 643 }, { "epoch": 2.312, "grad_norm": 0.543483855236602, "learning_rate": 3.088887705532409e-05, "loss": 0.4199, "loss_nan_ranks": 0, "loss_rank_avg": 0.3883250951766968, "step": 1445, "valid_targets_mean": 2985.1, "valid_targets_min": 1064 }, { "epoch": 2.32, "grad_norm": 0.6436537174269752, "learning_rate": 3.081068060470174e-05, "loss": 0.4015, "loss_nan_ranks": 0, "loss_rank_avg": 0.399660587310791, "step": 1450, "valid_targets_mean": 2157.8, "valid_targets_min": 635 }, { "epoch": 2.328, "grad_norm": 0.6554604539338741, "learning_rate": 3.073224997696385e-05, "loss": 0.4073, "loss_nan_ranks": 0, "loss_rank_avg": 0.42677703499794006, "step": 1455, "valid_targets_mean": 2295.5, "valid_targets_min": 718 }, { "epoch": 2.336, "grad_norm": 0.6275787273732197, "learning_rate": 3.065358687104675e-05, "loss": 0.3884, "loss_nan_ranks": 0, "loss_rank_avg": 0.4116145372390747, "step": 1460, "valid_targets_mean": 2349.5, "valid_targets_min": 670 }, { "epoch": 2.344, "grad_norm": 0.7507823236318536, "learning_rate": 3.057469299092264e-05, "loss": 0.4285, "loss_nan_ranks": 0, "loss_rank_avg": 0.4097451865673065, "step": 1465, "valid_targets_mean": 2530.2, "valid_targets_min": 741 }, { "epoch": 2.352, "grad_norm": 0.5594297562132423, "learning_rate": 3.0495570045562686e-05, "loss": 0.4003, "loss_nan_ranks": 0, "loss_rank_avg": 0.37826666235923767, "step": 1470, "valid_targets_mean": 2806.1, "valid_targets_min": 1147 }, { "epoch": 2.36, "grad_norm": 0.5374966128720836, "learning_rate": 3.041621974889996e-05, "loss": 0.3869, "loss_nan_ranks": 0, "loss_rank_avg": 0.36974743008613586, "step": 1475, "valid_targets_mean": 2765.7, "valid_targets_min": 943 }, { "epoch": 2.368, "grad_norm": 0.7071950629766613, "learning_rate": 3.0336643819792342e-05, "loss": 0.3986, "loss_nan_ranks": 0, "loss_rank_avg": 0.44906729459762573, "step": 1480, "valid_targets_mean": 1984.7, "valid_targets_min": 712 }, { "epoch": 2.376, "grad_norm": 0.6274520592565033, "learning_rate": 3.0256843981985295e-05, "loss": 0.4204, "loss_nan_ranks": 0, "loss_rank_avg": 0.45014089345932007, "step": 1485, "valid_targets_mean": 2480.5, "valid_targets_min": 1369 }, { "epoch": 2.384, "grad_norm": 0.6682182125144448, "learning_rate": 3.0176821964074503e-05, "loss": 0.4211, "loss_nan_ranks": 0, "loss_rank_avg": 0.4411892294883728, "step": 1490, "valid_targets_mean": 2145.0, "valid_targets_min": 627 }, { "epoch": 2.392, "grad_norm": 0.631848940072248, "learning_rate": 3.009657949946844e-05, "loss": 0.4099, "loss_nan_ranks": 0, "loss_rank_avg": 0.4174479842185974, "step": 1495, "valid_targets_mean": 2216.5, "valid_targets_min": 595 }, { "epoch": 2.4, "grad_norm": 0.586918339875622, "learning_rate": 3.00161183263508e-05, "loss": 0.4064, "loss_nan_ranks": 0, "loss_rank_avg": 0.38466623425483704, "step": 1500, "valid_targets_mean": 2567.8, "valid_targets_min": 1635 }, { "epoch": 2.408, "grad_norm": 0.5723168490058183, "learning_rate": 2.993544018764289e-05, "loss": 0.4067, "loss_nan_ranks": 0, "loss_rank_avg": 0.3870210647583008, "step": 1505, "valid_targets_mean": 2610.5, "valid_targets_min": 619 }, { "epoch": 2.416, "grad_norm": 0.5238472137602374, "learning_rate": 2.9854546830965833e-05, "loss": 0.4087, "loss_nan_ranks": 0, "loss_rank_avg": 0.3635292649269104, "step": 1510, "valid_targets_mean": 2825.7, "valid_targets_min": 1135 }, { "epoch": 2.424, "grad_norm": 0.6061179858895165, "learning_rate": 2.9773440008602736e-05, "loss": 0.417, "loss_nan_ranks": 0, "loss_rank_avg": 0.4219356179237366, "step": 1515, "valid_targets_mean": 2323.0, "valid_targets_min": 567 }, { "epoch": 2.432, "grad_norm": 0.6339556380542279, "learning_rate": 2.96921214774607e-05, "loss": 0.3996, "loss_nan_ranks": 0, "loss_rank_avg": 0.4146285057067871, "step": 1520, "valid_targets_mean": 2218.8, "valid_targets_min": 712 }, { "epoch": 2.44, "grad_norm": 0.6064884477109023, "learning_rate": 2.9610592999032815e-05, "loss": 0.3976, "loss_nan_ranks": 0, "loss_rank_avg": 0.4029238224029541, "step": 1525, "valid_targets_mean": 2360.1, "valid_targets_min": 676 }, { "epoch": 2.448, "grad_norm": 0.6089595263918436, "learning_rate": 2.9528856339359973e-05, "loss": 0.4143, "loss_nan_ranks": 0, "loss_rank_avg": 0.42686522006988525, "step": 1530, "valid_targets_mean": 2517.9, "valid_targets_min": 747 }, { "epoch": 2.456, "grad_norm": 0.653020004015492, "learning_rate": 2.9446913268992588e-05, "loss": 0.4114, "loss_nan_ranks": 0, "loss_rank_avg": 0.4561653733253479, "step": 1535, "valid_targets_mean": 2295.3, "valid_targets_min": 1319 }, { "epoch": 2.464, "grad_norm": 0.6764309057796736, "learning_rate": 2.936476556295229e-05, "loss": 0.4172, "loss_nan_ranks": 0, "loss_rank_avg": 0.4331625998020172, "step": 1540, "valid_targets_mean": 2071.8, "valid_targets_min": 594 }, { "epoch": 2.472, "grad_norm": 0.6765172223358372, "learning_rate": 2.928241500069346e-05, "loss": 0.4172, "loss_nan_ranks": 0, "loss_rank_avg": 0.42376580834388733, "step": 1545, "valid_targets_mean": 2154.9, "valid_targets_min": 726 }, { "epoch": 2.48, "grad_norm": 0.6206273355585628, "learning_rate": 2.9199863366064655e-05, "loss": 0.4059, "loss_nan_ranks": 0, "loss_rank_avg": 0.41974562406539917, "step": 1550, "valid_targets_mean": 2765.5, "valid_targets_min": 948 }, { "epoch": 2.488, "grad_norm": 0.7048376150455586, "learning_rate": 2.9117112447270007e-05, "loss": 0.4254, "loss_nan_ranks": 0, "loss_rank_avg": 0.4536668658256531, "step": 1555, "valid_targets_mean": 1862.8, "valid_targets_min": 624 }, { "epoch": 2.496, "grad_norm": 0.6011963836992713, "learning_rate": 2.9034164036830462e-05, "loss": 0.423, "loss_nan_ranks": 0, "loss_rank_avg": 0.3865543007850647, "step": 1560, "valid_targets_mean": 2565.4, "valid_targets_min": 567 }, { "epoch": 2.504, "grad_norm": 0.6152960192868094, "learning_rate": 2.8951019931544975e-05, "loss": 0.4229, "loss_nan_ranks": 0, "loss_rank_avg": 0.42501020431518555, "step": 1565, "valid_targets_mean": 2785.4, "valid_targets_min": 1467 }, { "epoch": 2.512, "grad_norm": 0.6151356387539295, "learning_rate": 2.8867681932451544e-05, "loss": 0.3862, "loss_nan_ranks": 0, "loss_rank_avg": 0.42337119579315186, "step": 1570, "valid_targets_mean": 2417.8, "valid_targets_min": 1060 }, { "epoch": 2.52, "grad_norm": 0.5911162679240172, "learning_rate": 2.8784151844788267e-05, "loss": 0.394, "loss_nan_ranks": 0, "loss_rank_avg": 0.38688942790031433, "step": 1575, "valid_targets_mean": 2411.8, "valid_targets_min": 597 }, { "epoch": 2.528, "grad_norm": 0.6284051957446064, "learning_rate": 2.8700431477954155e-05, "loss": 0.4184, "loss_nan_ranks": 0, "loss_rank_avg": 0.42191997170448303, "step": 1580, "valid_targets_mean": 2048.2, "valid_targets_min": 681 }, { "epoch": 2.536, "grad_norm": 0.7208603354315725, "learning_rate": 2.8616522645470012e-05, "loss": 0.4018, "loss_nan_ranks": 0, "loss_rank_avg": 0.4383850395679474, "step": 1585, "valid_targets_mean": 1752.1, "valid_targets_min": 570 }, { "epoch": 2.544, "grad_norm": 0.6112554844178338, "learning_rate": 2.8532427164939086e-05, "loss": 0.4121, "loss_nan_ranks": 0, "loss_rank_avg": 0.3988059163093567, "step": 1590, "valid_targets_mean": 2160.6, "valid_targets_min": 824 }, { "epoch": 2.552, "grad_norm": 0.5885746331220164, "learning_rate": 2.844814685800776e-05, "loss": 0.3962, "loss_nan_ranks": 0, "loss_rank_avg": 0.41117456555366516, "step": 1595, "valid_targets_mean": 2444.8, "valid_targets_min": 895 }, { "epoch": 2.56, "grad_norm": 0.6859235265208016, "learning_rate": 2.8363683550326028e-05, "loss": 0.4086, "loss_nan_ranks": 0, "loss_rank_avg": 0.42529648542404175, "step": 1600, "valid_targets_mean": 2296.4, "valid_targets_min": 511 }, { "epoch": 2.568, "grad_norm": 0.5933961450763576, "learning_rate": 2.8279039071508024e-05, "loss": 0.4134, "loss_nan_ranks": 0, "loss_rank_avg": 0.41135159134864807, "step": 1605, "valid_targets_mean": 2716.2, "valid_targets_min": 1675 }, { "epoch": 2.576, "grad_norm": 0.5973688684206384, "learning_rate": 2.81942152550923e-05, "loss": 0.4097, "loss_nan_ranks": 0, "loss_rank_avg": 0.38977688550949097, "step": 1610, "valid_targets_mean": 2420.4, "valid_targets_min": 732 }, { "epoch": 2.584, "grad_norm": 0.6640208566686371, "learning_rate": 2.810921393850219e-05, "loss": 0.4195, "loss_nan_ranks": 0, "loss_rank_avg": 0.4426608979701996, "step": 1615, "valid_targets_mean": 2348.2, "valid_targets_min": 1157 }, { "epoch": 2.592, "grad_norm": 0.664573850812217, "learning_rate": 2.802403696300595e-05, "loss": 0.4022, "loss_nan_ranks": 0, "loss_rank_avg": 0.42512786388397217, "step": 1620, "valid_targets_mean": 1862.1, "valid_targets_min": 891 }, { "epoch": 2.6, "grad_norm": 0.572539200427506, "learning_rate": 2.7938686173676915e-05, "loss": 0.4109, "loss_nan_ranks": 0, "loss_rank_avg": 0.39695966243743896, "step": 1625, "valid_targets_mean": 2581.4, "valid_targets_min": 872 }, { "epoch": 2.608, "grad_norm": 0.6499708727510073, "learning_rate": 2.7853163419353505e-05, "loss": 0.4196, "loss_nan_ranks": 0, "loss_rank_avg": 0.4214821457862854, "step": 1630, "valid_targets_mean": 2067.1, "valid_targets_min": 668 }, { "epoch": 2.616, "grad_norm": 0.6694649398545164, "learning_rate": 2.776747055259918e-05, "loss": 0.4264, "loss_nan_ranks": 0, "loss_rank_avg": 0.42351481318473816, "step": 1635, "valid_targets_mean": 2090.0, "valid_targets_min": 743 }, { "epoch": 2.624, "grad_norm": 0.6020293288838131, "learning_rate": 2.768160942966233e-05, "loss": 0.4113, "loss_nan_ranks": 0, "loss_rank_avg": 0.4066402018070221, "step": 1640, "valid_targets_mean": 2337.1, "valid_targets_min": 950 }, { "epoch": 2.632, "grad_norm": 0.6312642272158415, "learning_rate": 2.759558191043603e-05, "loss": 0.412, "loss_nan_ranks": 0, "loss_rank_avg": 0.41122761368751526, "step": 1645, "valid_targets_mean": 2409.6, "valid_targets_min": 634 }, { "epoch": 2.64, "grad_norm": 0.5658199590495661, "learning_rate": 2.7509389858417783e-05, "loss": 0.3912, "loss_nan_ranks": 0, "loss_rank_avg": 0.3818676173686981, "step": 1650, "valid_targets_mean": 2471.4, "valid_targets_min": 852 }, { "epoch": 2.648, "grad_norm": 0.6723251564057431, "learning_rate": 2.7423035140669147e-05, "loss": 0.4001, "loss_nan_ranks": 0, "loss_rank_avg": 0.3987196981906891, "step": 1655, "valid_targets_mean": 2053.6, "valid_targets_min": 560 }, { "epoch": 2.656, "grad_norm": 0.54755572109968, "learning_rate": 2.7336519627775288e-05, "loss": 0.4021, "loss_nan_ranks": 0, "loss_rank_avg": 0.38828593492507935, "step": 1660, "valid_targets_mean": 2746.6, "valid_targets_min": 779 }, { "epoch": 2.664, "grad_norm": 0.5448530396320009, "learning_rate": 2.724984519380444e-05, "loss": 0.4073, "loss_nan_ranks": 0, "loss_rank_avg": 0.3946529030799866, "step": 1665, "valid_targets_mean": 2920.2, "valid_targets_min": 1401 }, { "epoch": 2.672, "grad_norm": 0.703153417370805, "learning_rate": 2.7163013716267353e-05, "loss": 0.4169, "loss_nan_ranks": 0, "loss_rank_avg": 0.42749476432800293, "step": 1670, "valid_targets_mean": 2015.0, "valid_targets_min": 894 }, { "epoch": 2.68, "grad_norm": 0.6358114835875868, "learning_rate": 2.707602707607659e-05, "loss": 0.4087, "loss_nan_ranks": 0, "loss_rank_avg": 0.4138975739479065, "step": 1675, "valid_targets_mean": 2263.9, "valid_targets_min": 829 }, { "epoch": 2.6879999999999997, "grad_norm": 0.5591433391102716, "learning_rate": 2.6988887157505786e-05, "loss": 0.4208, "loss_nan_ranks": 0, "loss_rank_avg": 0.39621496200561523, "step": 1680, "valid_targets_mean": 2720.2, "valid_targets_min": 1269 }, { "epoch": 2.6959999999999997, "grad_norm": 0.6060961673596665, "learning_rate": 2.6901595848148842e-05, "loss": 0.3999, "loss_nan_ranks": 0, "loss_rank_avg": 0.4340288043022156, "step": 1685, "valid_targets_mean": 2491.8, "valid_targets_min": 755 }, { "epoch": 2.7039999999999997, "grad_norm": 0.5969591273634783, "learning_rate": 2.681415503887904e-05, "loss": 0.4123, "loss_nan_ranks": 0, "loss_rank_avg": 0.40641626715660095, "step": 1690, "valid_targets_mean": 2428.6, "valid_targets_min": 754 }, { "epoch": 2.7119999999999997, "grad_norm": 0.5988970263104237, "learning_rate": 2.672656662380805e-05, "loss": 0.3947, "loss_nan_ranks": 0, "loss_rank_avg": 0.4131797254085541, "step": 1695, "valid_targets_mean": 2530.7, "valid_targets_min": 703 }, { "epoch": 2.7199999999999998, "grad_norm": 0.580739934515122, "learning_rate": 2.6638832500244967e-05, "loss": 0.4172, "loss_nan_ranks": 0, "loss_rank_avg": 0.40965735912323, "step": 1700, "valid_targets_mean": 2454.7, "valid_targets_min": 731 }, { "epoch": 2.7279999999999998, "grad_norm": 0.6069252169341895, "learning_rate": 2.655095456865514e-05, "loss": 0.4041, "loss_nan_ranks": 0, "loss_rank_avg": 0.41324254870414734, "step": 1705, "valid_targets_mean": 2458.7, "valid_targets_min": 1246 }, { "epoch": 2.7359999999999998, "grad_norm": 0.5956036832667821, "learning_rate": 2.6462934732619047e-05, "loss": 0.4, "loss_nan_ranks": 0, "loss_rank_avg": 0.40611159801483154, "step": 1710, "valid_targets_mean": 2482.6, "valid_targets_min": 1273 }, { "epoch": 2.7439999999999998, "grad_norm": 0.6184514343402461, "learning_rate": 2.6374774898791047e-05, "loss": 0.4061, "loss_nan_ranks": 0, "loss_rank_avg": 0.3789522349834442, "step": 1715, "valid_targets_mean": 2289.4, "valid_targets_min": 689 }, { "epoch": 2.752, "grad_norm": 0.6112887143892226, "learning_rate": 2.6286476976858084e-05, "loss": 0.4117, "loss_nan_ranks": 0, "loss_rank_avg": 0.4043063521385193, "step": 1720, "valid_targets_mean": 2493.1, "valid_targets_min": 779 }, { "epoch": 2.76, "grad_norm": 0.6124760782362094, "learning_rate": 2.619804287949831e-05, "loss": 0.4078, "loss_nan_ranks": 0, "loss_rank_avg": 0.4235222041606903, "step": 1725, "valid_targets_mean": 2521.1, "valid_targets_min": 693 }, { "epoch": 2.768, "grad_norm": 0.5227704496822333, "learning_rate": 2.6109474522339676e-05, "loss": 0.3775, "loss_nan_ranks": 0, "loss_rank_avg": 0.3500251770019531, "step": 1730, "valid_targets_mean": 2562.7, "valid_targets_min": 745 }, { "epoch": 2.776, "grad_norm": 0.5969991715645667, "learning_rate": 2.6020773823918414e-05, "loss": 0.4032, "loss_nan_ranks": 0, "loss_rank_avg": 0.37266242504119873, "step": 1735, "valid_targets_mean": 2158.2, "valid_targets_min": 575 }, { "epoch": 2.784, "grad_norm": 0.6267891785393893, "learning_rate": 2.5931942705637473e-05, "loss": 0.4021, "loss_nan_ranks": 0, "loss_rank_avg": 0.4024673402309418, "step": 1740, "valid_targets_mean": 2321.1, "valid_targets_min": 650 }, { "epoch": 2.792, "grad_norm": 0.6044184430625194, "learning_rate": 2.5842983091724923e-05, "loss": 0.4185, "loss_nan_ranks": 0, "loss_rank_avg": 0.42339223623275757, "step": 1745, "valid_targets_mean": 2422.4, "valid_targets_min": 933 }, { "epoch": 2.8, "grad_norm": 0.5784685545960938, "learning_rate": 2.575389690919226e-05, "loss": 0.408, "loss_nan_ranks": 0, "loss_rank_avg": 0.42380669713020325, "step": 1750, "valid_targets_mean": 2760.5, "valid_targets_min": 1768 }, { "epoch": 2.808, "grad_norm": 0.6316326299220579, "learning_rate": 2.5664686087792658e-05, "loss": 0.3924, "loss_nan_ranks": 0, "loss_rank_avg": 0.40810805559158325, "step": 1755, "valid_targets_mean": 2373.4, "valid_targets_min": 653 }, { "epoch": 2.816, "grad_norm": 0.6781625675287202, "learning_rate": 2.5575352559979188e-05, "loss": 0.4057, "loss_nan_ranks": 0, "loss_rank_avg": 0.3918643593788147, "step": 1760, "valid_targets_mean": 2486.4, "valid_targets_min": 604 }, { "epoch": 2.824, "grad_norm": 0.6149082105532682, "learning_rate": 2.5485898260862936e-05, "loss": 0.4044, "loss_nan_ranks": 0, "loss_rank_avg": 0.41537925601005554, "step": 1765, "valid_targets_mean": 2468.2, "valid_targets_min": 639 }, { "epoch": 2.832, "grad_norm": 0.6370806548762461, "learning_rate": 2.5396325128171072e-05, "loss": 0.4147, "loss_nan_ranks": 0, "loss_rank_avg": 0.39408478140830994, "step": 1770, "valid_targets_mean": 2442.0, "valid_targets_min": 608 }, { "epoch": 2.84, "grad_norm": 0.5719076536940244, "learning_rate": 2.5306635102204942e-05, "loss": 0.4089, "loss_nan_ranks": 0, "loss_rank_avg": 0.414425253868103, "step": 1775, "valid_targets_mean": 2648.8, "valid_targets_min": 1276 }, { "epoch": 2.848, "grad_norm": 0.619047617996578, "learning_rate": 2.5216830125797943e-05, "loss": 0.3797, "loss_nan_ranks": 0, "loss_rank_avg": 0.36515292525291443, "step": 1780, "valid_targets_mean": 2069.6, "valid_targets_min": 615 }, { "epoch": 2.856, "grad_norm": 0.6064877336124406, "learning_rate": 2.5126912144273517e-05, "loss": 0.4104, "loss_nan_ranks": 0, "loss_rank_avg": 0.4058312773704529, "step": 1785, "valid_targets_mean": 2320.6, "valid_targets_min": 471 }, { "epoch": 2.864, "grad_norm": 0.670022634304181, "learning_rate": 2.5036883105402985e-05, "loss": 0.4091, "loss_nan_ranks": 0, "loss_rank_avg": 0.40476682782173157, "step": 1790, "valid_targets_mean": 1988.9, "valid_targets_min": 631 }, { "epoch": 2.872, "grad_norm": 0.6044593976096089, "learning_rate": 2.4946744959363343e-05, "loss": 0.3981, "loss_nan_ranks": 0, "loss_rank_avg": 0.38838961720466614, "step": 1795, "valid_targets_mean": 2484.6, "valid_targets_min": 726 }, { "epoch": 2.88, "grad_norm": 0.614236708206963, "learning_rate": 2.4856499658695018e-05, "loss": 0.4101, "loss_nan_ranks": 0, "loss_rank_avg": 0.4145464301109314, "step": 1800, "valid_targets_mean": 2283.9, "valid_targets_min": 812 }, { "epoch": 2.888, "grad_norm": 0.8773740496005488, "learning_rate": 2.4766149158259603e-05, "loss": 0.3976, "loss_nan_ranks": 0, "loss_rank_avg": 0.3907817304134369, "step": 1805, "valid_targets_mean": 2194.6, "valid_targets_min": 508 }, { "epoch": 2.896, "grad_norm": 0.6018336839491192, "learning_rate": 2.4675695415197476e-05, "loss": 0.396, "loss_nan_ranks": 0, "loss_rank_avg": 0.4207257330417633, "step": 1810, "valid_targets_mean": 2481.9, "valid_targets_min": 735 }, { "epoch": 2.904, "grad_norm": 0.5685142459911705, "learning_rate": 2.458514038888543e-05, "loss": 0.4141, "loss_nan_ranks": 0, "loss_rank_avg": 0.40741056203842163, "step": 1815, "valid_targets_mean": 2612.0, "valid_targets_min": 887 }, { "epoch": 2.912, "grad_norm": 0.6983489922705232, "learning_rate": 2.4494486040894208e-05, "loss": 0.4072, "loss_nan_ranks": 0, "loss_rank_avg": 0.44186872243881226, "step": 1820, "valid_targets_mean": 2102.1, "valid_targets_min": 1287 }, { "epoch": 2.92, "grad_norm": 0.6147911556976061, "learning_rate": 2.440373433494603e-05, "loss": 0.4041, "loss_nan_ranks": 0, "loss_rank_avg": 0.45045363903045654, "step": 1825, "valid_targets_mean": 2486.1, "valid_targets_min": 523 }, { "epoch": 2.928, "grad_norm": 0.6271247387935147, "learning_rate": 2.4312887236872066e-05, "loss": 0.4171, "loss_nan_ranks": 0, "loss_rank_avg": 0.4281001091003418, "step": 1830, "valid_targets_mean": 2274.2, "valid_targets_min": 1405 }, { "epoch": 2.936, "grad_norm": 0.6532889026699363, "learning_rate": 2.4221946714569803e-05, "loss": 0.4191, "loss_nan_ranks": 0, "loss_rank_avg": 0.4529232084751129, "step": 1835, "valid_targets_mean": 2092.2, "valid_targets_min": 777 }, { "epoch": 2.944, "grad_norm": 0.6348842178072138, "learning_rate": 2.4130914737960472e-05, "loss": 0.4113, "loss_nan_ranks": 0, "loss_rank_avg": 0.3983311653137207, "step": 1840, "valid_targets_mean": 2224.6, "valid_targets_min": 755 }, { "epoch": 2.952, "grad_norm": 0.4937114027035953, "learning_rate": 2.4039793278946358e-05, "loss": 0.3708, "loss_nan_ranks": 0, "loss_rank_avg": 0.34252679347991943, "step": 1845, "valid_targets_mean": 2861.1, "valid_targets_min": 824 }, { "epoch": 2.96, "grad_norm": 0.623341775639461, "learning_rate": 2.394858431136806e-05, "loss": 0.4127, "loss_nan_ranks": 0, "loss_rank_avg": 0.4113304018974304, "step": 1850, "valid_targets_mean": 2284.8, "valid_targets_min": 645 }, { "epoch": 2.968, "grad_norm": 0.5683584704356505, "learning_rate": 2.385728981096178e-05, "loss": 0.403, "loss_nan_ranks": 0, "loss_rank_avg": 0.3914598822593689, "step": 1855, "valid_targets_mean": 2621.4, "valid_targets_min": 843 }, { "epoch": 2.976, "grad_norm": 0.5802616502103262, "learning_rate": 2.3765911755316503e-05, "loss": 0.4092, "loss_nan_ranks": 0, "loss_rank_avg": 0.3963446617126465, "step": 1860, "valid_targets_mean": 2564.2, "valid_targets_min": 617 }, { "epoch": 2.984, "grad_norm": 0.6163067326469784, "learning_rate": 2.3674452123831125e-05, "loss": 0.3884, "loss_nan_ranks": 0, "loss_rank_avg": 0.4104539155960083, "step": 1865, "valid_targets_mean": 2320.2, "valid_targets_min": 1112 }, { "epoch": 2.992, "grad_norm": 0.5391263340107644, "learning_rate": 2.358291289767165e-05, "loss": 0.4064, "loss_nan_ranks": 0, "loss_rank_avg": 0.3693543076515198, "step": 1870, "valid_targets_mean": 2771.8, "valid_targets_min": 1090 }, { "epoch": 3.0, "grad_norm": 0.6545761052990515, "learning_rate": 2.3491296059728202e-05, "loss": 0.411, "loss_nan_ranks": 0, "loss_rank_avg": 0.4166029393672943, "step": 1875, "valid_targets_mean": 2221.6, "valid_targets_min": 662 }, { "epoch": 3.008, "grad_norm": 0.5881439927848966, "learning_rate": 2.339960359457212e-05, "loss": 0.372, "loss_nan_ranks": 0, "loss_rank_avg": 0.37697720527648926, "step": 1880, "valid_targets_mean": 2365.9, "valid_targets_min": 945 }, { "epoch": 3.016, "grad_norm": 0.6063398287124605, "learning_rate": 2.3307837488412955e-05, "loss": 0.3772, "loss_nan_ranks": 0, "loss_rank_avg": 0.3653172254562378, "step": 1885, "valid_targets_mean": 2340.2, "valid_targets_min": 977 }, { "epoch": 3.024, "grad_norm": 0.5968531392776985, "learning_rate": 2.3215999729055437e-05, "loss": 0.3783, "loss_nan_ranks": 0, "loss_rank_avg": 0.36922895908355713, "step": 1890, "valid_targets_mean": 2414.5, "valid_targets_min": 590 }, { "epoch": 3.032, "grad_norm": 0.6375543970755811, "learning_rate": 2.312409230585641e-05, "loss": 0.376, "loss_nan_ranks": 0, "loss_rank_avg": 0.38601815700531006, "step": 1895, "valid_targets_mean": 2255.2, "valid_targets_min": 1107 }, { "epoch": 3.04, "grad_norm": 0.6060375860752494, "learning_rate": 2.3032117209681782e-05, "loss": 0.3874, "loss_nan_ranks": 0, "loss_rank_avg": 0.3774688243865967, "step": 1900, "valid_targets_mean": 2389.7, "valid_targets_min": 824 }, { "epoch": 3.048, "grad_norm": 0.5654895133707849, "learning_rate": 2.2940076432863335e-05, "loss": 0.3711, "loss_nan_ranks": 0, "loss_rank_avg": 0.36035019159317017, "step": 1905, "valid_targets_mean": 2650.9, "valid_targets_min": 779 }, { "epoch": 3.056, "grad_norm": 0.59516779658439, "learning_rate": 2.2847971969155626e-05, "loss": 0.397, "loss_nan_ranks": 0, "loss_rank_avg": 0.3954877257347107, "step": 1910, "valid_targets_mean": 2668.8, "valid_targets_min": 292 }, { "epoch": 3.064, "grad_norm": 0.6427203508454588, "learning_rate": 2.275580581369276e-05, "loss": 0.4007, "loss_nan_ranks": 0, "loss_rank_avg": 0.4016428589820862, "step": 1915, "valid_targets_mean": 2186.6, "valid_targets_min": 670 }, { "epoch": 3.072, "grad_norm": 0.6009841223748552, "learning_rate": 2.2663579962945205e-05, "loss": 0.3836, "loss_nan_ranks": 0, "loss_rank_avg": 0.36916252970695496, "step": 1920, "valid_targets_mean": 2420.2, "valid_targets_min": 477 }, { "epoch": 3.08, "grad_norm": 0.5884916954322104, "learning_rate": 2.2571296414676503e-05, "loss": 0.3648, "loss_nan_ranks": 0, "loss_rank_avg": 0.35220831632614136, "step": 1925, "valid_targets_mean": 2553.4, "valid_targets_min": 865 }, { "epoch": 3.088, "grad_norm": 0.6147850470764658, "learning_rate": 2.2478957167900038e-05, "loss": 0.3778, "loss_nan_ranks": 0, "loss_rank_avg": 0.3545624911785126, "step": 1930, "valid_targets_mean": 2651.2, "valid_targets_min": 906 }, { "epoch": 3.096, "grad_norm": 0.654324522879676, "learning_rate": 2.23865642228357e-05, "loss": 0.3808, "loss_nan_ranks": 0, "loss_rank_avg": 0.4201942980289459, "step": 1935, "valid_targets_mean": 2273.0, "valid_targets_min": 989 }, { "epoch": 3.104, "grad_norm": 0.6486633604347383, "learning_rate": 2.2294119580866592e-05, "loss": 0.3807, "loss_nan_ranks": 0, "loss_rank_avg": 0.37265709042549133, "step": 1940, "valid_targets_mean": 2489.9, "valid_targets_min": 747 }, { "epoch": 3.112, "grad_norm": 0.6212204162828813, "learning_rate": 2.2201625244495646e-05, "loss": 0.372, "loss_nan_ranks": 0, "loss_rank_avg": 0.386108934879303, "step": 1945, "valid_targets_mean": 2504.6, "valid_targets_min": 602 }, { "epoch": 3.12, "grad_norm": 0.6195173102616667, "learning_rate": 2.2109083217302242e-05, "loss": 0.3909, "loss_nan_ranks": 0, "loss_rank_avg": 0.3882253170013428, "step": 1950, "valid_targets_mean": 2608.1, "valid_targets_min": 1199 }, { "epoch": 3.128, "grad_norm": 0.601347688833568, "learning_rate": 2.201649550389885e-05, "loss": 0.3773, "loss_nan_ranks": 0, "loss_rank_avg": 0.37615832686424255, "step": 1955, "valid_targets_mean": 2454.6, "valid_targets_min": 812 }, { "epoch": 3.136, "grad_norm": 0.5855501844342232, "learning_rate": 2.1923864109887556e-05, "loss": 0.3892, "loss_nan_ranks": 0, "loss_rank_avg": 0.40102967619895935, "step": 1960, "valid_targets_mean": 2845.6, "valid_targets_min": 1626 }, { "epoch": 3.144, "grad_norm": 0.6462381753806768, "learning_rate": 2.1831191041816652e-05, "loss": 0.3798, "loss_nan_ranks": 0, "loss_rank_avg": 0.3896256685256958, "step": 1965, "valid_targets_mean": 2207.4, "valid_targets_min": 691 }, { "epoch": 3.152, "grad_norm": 0.6447232425326718, "learning_rate": 2.173847830713715e-05, "loss": 0.3921, "loss_nan_ranks": 0, "loss_rank_avg": 0.3892019987106323, "step": 1970, "valid_targets_mean": 2329.9, "valid_targets_min": 1223 }, { "epoch": 3.16, "grad_norm": 0.5537840504045047, "learning_rate": 2.1645727914159315e-05, "loss": 0.3646, "loss_nan_ranks": 0, "loss_rank_avg": 0.3495325446128845, "step": 1975, "valid_targets_mean": 2758.8, "valid_targets_min": 1192 }, { "epoch": 3.168, "grad_norm": 0.5569181058229623, "learning_rate": 2.1552941872009144e-05, "loss": 0.3859, "loss_nan_ranks": 0, "loss_rank_avg": 0.3637195825576782, "step": 1980, "valid_targets_mean": 2719.5, "valid_targets_min": 620 }, { "epoch": 3.176, "grad_norm": 0.6757754955399397, "learning_rate": 2.1460122190584868e-05, "loss": 0.4003, "loss_nan_ranks": 0, "loss_rank_avg": 0.44064268469810486, "step": 1985, "valid_targets_mean": 2298.9, "valid_targets_min": 1347 }, { "epoch": 3.184, "grad_norm": 0.6266172665957364, "learning_rate": 2.1367270880513377e-05, "loss": 0.3862, "loss_nan_ranks": 0, "loss_rank_avg": 0.3712347745895386, "step": 1990, "valid_targets_mean": 2372.4, "valid_targets_min": 678 }, { "epoch": 3.192, "grad_norm": 0.629095700387527, "learning_rate": 2.127438995310671e-05, "loss": 0.3919, "loss_nan_ranks": 0, "loss_rank_avg": 0.3881651759147644, "step": 1995, "valid_targets_mean": 2208.1, "valid_targets_min": 627 }, { "epoch": 3.2, "grad_norm": 0.6152198991611968, "learning_rate": 2.118148142031846e-05, "loss": 0.3856, "loss_nan_ranks": 0, "loss_rank_avg": 0.3625306785106659, "step": 2000, "valid_targets_mean": 2492.7, "valid_targets_min": 875 }, { "epoch": 3.208, "grad_norm": 0.6833342445465392, "learning_rate": 2.1088547294700182e-05, "loss": 0.3841, "loss_nan_ranks": 0, "loss_rank_avg": 0.4140799641609192, "step": 2005, "valid_targets_mean": 2277.3, "valid_targets_min": 701 }, { "epoch": 3.216, "grad_norm": 0.5798542496292155, "learning_rate": 2.0995589589357846e-05, "loss": 0.3796, "loss_nan_ranks": 0, "loss_rank_avg": 0.3835946321487427, "step": 2010, "valid_targets_mean": 2782.6, "valid_targets_min": 1224 }, { "epoch": 3.224, "grad_norm": 0.5664440967682081, "learning_rate": 2.0902610317908175e-05, "loss": 0.3621, "loss_nan_ranks": 0, "loss_rank_avg": 0.3766084313392639, "step": 2015, "valid_targets_mean": 2869.3, "valid_targets_min": 627 }, { "epoch": 3.232, "grad_norm": 0.6637622837999462, "learning_rate": 2.080961149443505e-05, "loss": 0.3888, "loss_nan_ranks": 0, "loss_rank_avg": 0.37104371190071106, "step": 2020, "valid_targets_mean": 2083.4, "valid_targets_min": 809 }, { "epoch": 3.24, "grad_norm": 0.7978545413525489, "learning_rate": 2.071659513344589e-05, "loss": 0.3606, "loss_nan_ranks": 0, "loss_rank_avg": 0.3885534703731537, "step": 2025, "valid_targets_mean": 2178.2, "valid_targets_min": 910 }, { "epoch": 3.248, "grad_norm": 0.6296550337826623, "learning_rate": 2.0623563249828e-05, "loss": 0.3943, "loss_nan_ranks": 0, "loss_rank_avg": 0.385465532541275, "step": 2030, "valid_targets_mean": 2537.3, "valid_targets_min": 598 }, { "epoch": 3.2560000000000002, "grad_norm": 0.5934206085224188, "learning_rate": 2.053051785880492e-05, "loss": 0.3702, "loss_nan_ranks": 0, "loss_rank_avg": 0.3774966299533844, "step": 2035, "valid_targets_mean": 2659.2, "valid_targets_min": 1175 }, { "epoch": 3.2640000000000002, "grad_norm": 0.7167344747974587, "learning_rate": 2.0437460975892814e-05, "loss": 0.401, "loss_nan_ranks": 0, "loss_rank_avg": 0.40590858459472656, "step": 2040, "valid_targets_mean": 1961.8, "valid_targets_min": 777 }, { "epoch": 3.2720000000000002, "grad_norm": 0.6267710623077165, "learning_rate": 2.0344394616856736e-05, "loss": 0.384, "loss_nan_ranks": 0, "loss_rank_avg": 0.38169512152671814, "step": 2045, "valid_targets_mean": 2491.3, "valid_targets_min": 1233 }, { "epoch": 3.2800000000000002, "grad_norm": 0.5745339719311472, "learning_rate": 2.0251320797667056e-05, "loss": 0.3775, "loss_nan_ranks": 0, "loss_rank_avg": 0.34669169783592224, "step": 2050, "valid_targets_mean": 2608.9, "valid_targets_min": 817 }, { "epoch": 3.288, "grad_norm": 0.6140351382017055, "learning_rate": 2.01582415344557e-05, "loss": 0.3787, "loss_nan_ranks": 0, "loss_rank_avg": 0.3617323935031891, "step": 2055, "valid_targets_mean": 2331.2, "valid_targets_min": 637 }, { "epoch": 3.296, "grad_norm": 0.6083097608917135, "learning_rate": 2.006515884347255e-05, "loss": 0.379, "loss_nan_ranks": 0, "loss_rank_avg": 0.35356348752975464, "step": 2060, "valid_targets_mean": 2350.0, "valid_targets_min": 931 }, { "epoch": 3.304, "grad_norm": 0.6638832788004323, "learning_rate": 1.9972074741041712e-05, "loss": 0.3858, "loss_nan_ranks": 0, "loss_rank_avg": 0.36626502871513367, "step": 2065, "valid_targets_mean": 2050.3, "valid_targets_min": 538 }, { "epoch": 3.312, "grad_norm": 0.6787805881687811, "learning_rate": 1.9878991243517913e-05, "loss": 0.3932, "loss_nan_ranks": 0, "loss_rank_avg": 0.41017746925354004, "step": 2070, "valid_targets_mean": 2297.9, "valid_targets_min": 776 }, { "epoch": 3.32, "grad_norm": 0.6931763136350336, "learning_rate": 1.9785910367242712e-05, "loss": 0.3945, "loss_nan_ranks": 0, "loss_rank_avg": 0.4205699563026428, "step": 2075, "valid_targets_mean": 2180.5, "valid_targets_min": 580 }, { "epoch": 3.328, "grad_norm": 0.6574199956221226, "learning_rate": 1.969283412850094e-05, "loss": 0.3751, "loss_nan_ranks": 0, "loss_rank_avg": 0.41730862855911255, "step": 2080, "valid_targets_mean": 2249.2, "valid_targets_min": 887 }, { "epoch": 3.336, "grad_norm": 0.5603282902950534, "learning_rate": 1.959976454347696e-05, "loss": 0.3569, "loss_nan_ranks": 0, "loss_rank_avg": 0.32021045684814453, "step": 2085, "valid_targets_mean": 2726.4, "valid_targets_min": 874 }, { "epoch": 3.344, "grad_norm": 0.6385163168979912, "learning_rate": 1.950670362821098e-05, "loss": 0.3619, "loss_nan_ranks": 0, "loss_rank_avg": 0.3663329482078552, "step": 2090, "valid_targets_mean": 2306.5, "valid_targets_min": 883 }, { "epoch": 3.352, "grad_norm": 0.5837917390861946, "learning_rate": 1.9413653398555437e-05, "loss": 0.3772, "loss_nan_ranks": 0, "loss_rank_avg": 0.3824262320995331, "step": 2095, "valid_targets_mean": 2530.4, "valid_targets_min": 916 }, { "epoch": 3.36, "grad_norm": 0.6366463867100456, "learning_rate": 1.9320615870131282e-05, "loss": 0.3701, "loss_nan_ranks": 0, "loss_rank_avg": 0.3722764849662781, "step": 2100, "valid_targets_mean": 2311.1, "valid_targets_min": 615 }, { "epoch": 3.368, "grad_norm": 0.618642847487934, "learning_rate": 1.9227593058284343e-05, "loss": 0.3752, "loss_nan_ranks": 0, "loss_rank_avg": 0.3829469084739685, "step": 2105, "valid_targets_mean": 2443.9, "valid_targets_min": 747 }, { "epoch": 3.376, "grad_norm": 0.6056798347672363, "learning_rate": 1.9134586978041663e-05, "loss": 0.3838, "loss_nan_ranks": 0, "loss_rank_avg": 0.3649381995201111, "step": 2110, "valid_targets_mean": 2630.2, "valid_targets_min": 1193 }, { "epoch": 3.384, "grad_norm": 0.6808595547961468, "learning_rate": 1.9041599644067846e-05, "loss": 0.3869, "loss_nan_ranks": 0, "loss_rank_avg": 0.40938037633895874, "step": 2115, "valid_targets_mean": 1987.3, "valid_targets_min": 897 }, { "epoch": 3.392, "grad_norm": 0.5591783619891524, "learning_rate": 1.8948633070621433e-05, "loss": 0.3828, "loss_nan_ranks": 0, "loss_rank_avg": 0.3668754994869232, "step": 2120, "valid_targets_mean": 2930.8, "valid_targets_min": 1054 }, { "epoch": 3.4, "grad_norm": 0.5708695292911752, "learning_rate": 1.885568927151124e-05, "loss": 0.3869, "loss_nan_ranks": 0, "loss_rank_avg": 0.36800146102905273, "step": 2125, "valid_targets_mean": 2738.0, "valid_targets_min": 1031 }, { "epoch": 3.408, "grad_norm": 0.6410323180590372, "learning_rate": 1.8762770260052773e-05, "loss": 0.3758, "loss_nan_ranks": 0, "loss_rank_avg": 0.39123398065567017, "step": 2130, "valid_targets_mean": 2428.8, "valid_targets_min": 370 }, { "epoch": 3.416, "grad_norm": 0.5958482716495009, "learning_rate": 1.8669878049024575e-05, "loss": 0.3926, "loss_nan_ranks": 0, "loss_rank_avg": 0.37300702929496765, "step": 2135, "valid_targets_mean": 2350.7, "valid_targets_min": 576 }, { "epoch": 3.424, "grad_norm": 0.6622488113759839, "learning_rate": 1.857701465062467e-05, "loss": 0.4018, "loss_nan_ranks": 0, "loss_rank_avg": 0.4317435622215271, "step": 2140, "valid_targets_mean": 2397.9, "valid_targets_min": 1223 }, { "epoch": 3.432, "grad_norm": 0.5977111746898944, "learning_rate": 1.848418207642693e-05, "loss": 0.3771, "loss_nan_ranks": 0, "loss_rank_avg": 0.39127588272094727, "step": 2145, "valid_targets_mean": 2532.9, "valid_targets_min": 959 }, { "epoch": 3.44, "grad_norm": 0.6975469176420807, "learning_rate": 1.8391382337337548e-05, "loss": 0.3999, "loss_nan_ranks": 0, "loss_rank_avg": 0.419955313205719, "step": 2150, "valid_targets_mean": 2115.2, "valid_targets_min": 786 }, { "epoch": 3.448, "grad_norm": 0.6151932097541486, "learning_rate": 1.829861744355144e-05, "loss": 0.3557, "loss_nan_ranks": 0, "loss_rank_avg": 0.3637232780456543, "step": 2155, "valid_targets_mean": 2578.8, "valid_targets_min": 1331 }, { "epoch": 3.456, "grad_norm": 0.6255190526192899, "learning_rate": 1.820588940450872e-05, "loss": 0.3856, "loss_nan_ranks": 0, "loss_rank_avg": 0.3912215828895569, "step": 2160, "valid_targets_mean": 2437.8, "valid_targets_min": 939 }, { "epoch": 3.464, "grad_norm": 0.629666509474719, "learning_rate": 1.8113200228851163e-05, "loss": 0.3914, "loss_nan_ranks": 0, "loss_rank_avg": 0.39584749937057495, "step": 2165, "valid_targets_mean": 2337.1, "valid_targets_min": 792 }, { "epoch": 3.472, "grad_norm": 0.6216477913146147, "learning_rate": 1.80205519243787e-05, "loss": 0.3657, "loss_nan_ranks": 0, "loss_rank_avg": 0.3581187427043915, "step": 2170, "valid_targets_mean": 2355.4, "valid_targets_min": 818 }, { "epoch": 3.48, "grad_norm": 0.6310163599034911, "learning_rate": 1.7927946498005934e-05, "loss": 0.3979, "loss_nan_ranks": 0, "loss_rank_avg": 0.3783031702041626, "step": 2175, "valid_targets_mean": 2337.3, "valid_targets_min": 950 }, { "epoch": 3.488, "grad_norm": 0.6238172083353898, "learning_rate": 1.7835385955718653e-05, "loss": 0.3915, "loss_nan_ranks": 0, "loss_rank_avg": 0.38771480321884155, "step": 2180, "valid_targets_mean": 2363.9, "valid_targets_min": 815 }, { "epoch": 3.496, "grad_norm": 0.6031878486237773, "learning_rate": 1.7742872302530366e-05, "loss": 0.3845, "loss_nan_ranks": 0, "loss_rank_avg": 0.3815397620201111, "step": 2185, "valid_targets_mean": 2484.9, "valid_targets_min": 1550 }, { "epoch": 3.504, "grad_norm": 0.6494767639468971, "learning_rate": 1.765040754243892e-05, "loss": 0.3865, "loss_nan_ranks": 0, "loss_rank_avg": 0.3746333122253418, "step": 2190, "valid_targets_mean": 2236.9, "valid_targets_min": 868 }, { "epoch": 3.512, "grad_norm": 0.6817120325400415, "learning_rate": 1.755799367838302e-05, "loss": 0.3788, "loss_nan_ranks": 0, "loss_rank_avg": 0.39564692974090576, "step": 2195, "valid_targets_mean": 2186.7, "valid_targets_min": 548 }, { "epoch": 3.52, "grad_norm": 0.6338802082931607, "learning_rate": 1.746563271219891e-05, "loss": 0.392, "loss_nan_ranks": 0, "loss_rank_avg": 0.4126970171928406, "step": 2200, "valid_targets_mean": 2585.1, "valid_targets_min": 707 }, { "epoch": 3.528, "grad_norm": 0.6416592297721261, "learning_rate": 1.7373326644576965e-05, "loss": 0.3872, "loss_nan_ranks": 0, "loss_rank_avg": 0.34282779693603516, "step": 2205, "valid_targets_mean": 2174.8, "valid_targets_min": 644 }, { "epoch": 3.536, "grad_norm": 0.6080200335692698, "learning_rate": 1.728107747501836e-05, "loss": 0.383, "loss_nan_ranks": 0, "loss_rank_avg": 0.37131014466285706, "step": 2210, "valid_targets_mean": 2337.9, "valid_targets_min": 1197 }, { "epoch": 3.544, "grad_norm": 0.5875911893293823, "learning_rate": 1.7188887201791785e-05, "loss": 0.3904, "loss_nan_ranks": 0, "loss_rank_avg": 0.38630616664886475, "step": 2215, "valid_targets_mean": 2732.9, "valid_targets_min": 626 }, { "epoch": 3.552, "grad_norm": 0.6679176162146531, "learning_rate": 1.7096757821890117e-05, "loss": 0.3899, "loss_nan_ranks": 0, "loss_rank_avg": 0.39532271027565, "step": 2220, "valid_targets_mean": 2232.2, "valid_targets_min": 1057 }, { "epoch": 3.56, "grad_norm": 0.6082886705221039, "learning_rate": 1.7004691330987196e-05, "loss": 0.4012, "loss_nan_ranks": 0, "loss_rank_avg": 0.38921183347702026, "step": 2225, "valid_targets_mean": 2391.8, "valid_targets_min": 506 }, { "epoch": 3.568, "grad_norm": 0.6520919498892896, "learning_rate": 1.691268972339458e-05, "loss": 0.3753, "loss_nan_ranks": 0, "loss_rank_avg": 0.40031343698501587, "step": 2230, "valid_targets_mean": 2362.3, "valid_targets_min": 589 }, { "epoch": 3.576, "grad_norm": 0.6381008434723833, "learning_rate": 1.6820754992018344e-05, "loss": 0.3912, "loss_nan_ranks": 0, "loss_rank_avg": 0.3827167749404907, "step": 2235, "valid_targets_mean": 2252.8, "valid_targets_min": 565 }, { "epoch": 3.584, "grad_norm": 0.6401934747434895, "learning_rate": 1.6728889128315932e-05, "loss": 0.3812, "loss_nan_ranks": 0, "loss_rank_avg": 0.37738150358200073, "step": 2240, "valid_targets_mean": 2236.8, "valid_targets_min": 545 }, { "epoch": 3.592, "grad_norm": 0.5616110823405466, "learning_rate": 1.663709412225297e-05, "loss": 0.3854, "loss_nan_ranks": 0, "loss_rank_avg": 0.3371823728084564, "step": 2245, "valid_targets_mean": 2436.0, "valid_targets_min": 537 }, { "epoch": 3.6, "grad_norm": 0.6235596557291808, "learning_rate": 1.654537196226022e-05, "loss": 0.3906, "loss_nan_ranks": 0, "loss_rank_avg": 0.3934670388698578, "step": 2250, "valid_targets_mean": 2573.6, "valid_targets_min": 585 }, { "epoch": 3.608, "grad_norm": 0.635368993888004, "learning_rate": 1.6453724635190455e-05, "loss": 0.382, "loss_nan_ranks": 0, "loss_rank_avg": 0.3701242208480835, "step": 2255, "valid_targets_mean": 2438.3, "valid_targets_min": 619 }, { "epoch": 3.616, "grad_norm": 0.6118501399881906, "learning_rate": 1.6362154126275467e-05, "loss": 0.3831, "loss_nan_ranks": 0, "loss_rank_avg": 0.3612810969352722, "step": 2260, "valid_targets_mean": 2340.6, "valid_targets_min": 848 }, { "epoch": 3.624, "grad_norm": 0.6125092689411699, "learning_rate": 1.6270662419083018e-05, "loss": 0.3812, "loss_nan_ranks": 0, "loss_rank_avg": 0.3900102972984314, "step": 2265, "valid_targets_mean": 2715.5, "valid_targets_min": 668 }, { "epoch": 3.632, "grad_norm": 0.5991814329834635, "learning_rate": 1.617925149547391e-05, "loss": 0.3798, "loss_nan_ranks": 0, "loss_rank_avg": 0.3785431683063507, "step": 2270, "valid_targets_mean": 2787.3, "valid_targets_min": 674 }, { "epoch": 3.64, "grad_norm": 0.649363117474134, "learning_rate": 1.608792333555904e-05, "loss": 0.3819, "loss_nan_ranks": 0, "loss_rank_avg": 0.39676743745803833, "step": 2275, "valid_targets_mean": 2309.2, "valid_targets_min": 973 }, { "epoch": 3.648, "grad_norm": 0.7434860302130719, "learning_rate": 1.5996679917656492e-05, "loss": 0.3788, "loss_nan_ranks": 0, "loss_rank_avg": 0.35800135135650635, "step": 2280, "valid_targets_mean": 2369.3, "valid_targets_min": 822 }, { "epoch": 3.656, "grad_norm": 0.6824484447511117, "learning_rate": 1.5905523218248723e-05, "loss": 0.3958, "loss_nan_ranks": 0, "loss_rank_avg": 0.4209508001804352, "step": 2285, "valid_targets_mean": 2079.3, "valid_targets_min": 605 }, { "epoch": 3.664, "grad_norm": 0.6571213066551697, "learning_rate": 1.5814455211939698e-05, "loss": 0.3844, "loss_nan_ranks": 0, "loss_rank_avg": 0.38445529341697693, "step": 2290, "valid_targets_mean": 2158.9, "valid_targets_min": 645 }, { "epoch": 3.672, "grad_norm": 0.6698625366113996, "learning_rate": 1.5723477871412168e-05, "loss": 0.382, "loss_nan_ranks": 0, "loss_rank_avg": 0.37412458658218384, "step": 2295, "valid_targets_mean": 2017.5, "valid_targets_min": 645 }, { "epoch": 3.68, "grad_norm": 0.5843315979866865, "learning_rate": 1.56325931673849e-05, "loss": 0.3732, "loss_nan_ranks": 0, "loss_rank_avg": 0.3598048985004425, "step": 2300, "valid_targets_mean": 2778.2, "valid_targets_min": 669 }, { "epoch": 3.6879999999999997, "grad_norm": 0.6520409980765114, "learning_rate": 1.5541803068569993e-05, "loss": 0.3908, "loss_nan_ranks": 0, "loss_rank_avg": 0.40478581190109253, "step": 2305, "valid_targets_mean": 2315.8, "valid_targets_min": 779 }, { "epoch": 3.6959999999999997, "grad_norm": 0.7037994821981977, "learning_rate": 1.5451109541630275e-05, "loss": 0.3886, "loss_nan_ranks": 0, "loss_rank_avg": 0.4001522660255432, "step": 2310, "valid_targets_mean": 2318.7, "valid_targets_min": 1207 }, { "epoch": 3.7039999999999997, "grad_norm": 0.6534218246018931, "learning_rate": 1.536051455113663e-05, "loss": 0.3845, "loss_nan_ranks": 0, "loss_rank_avg": 0.39225444197654724, "step": 2315, "valid_targets_mean": 2325.8, "valid_targets_min": 679 }, { "epoch": 3.7119999999999997, "grad_norm": 0.7316026004311299, "learning_rate": 1.527002005952551e-05, "loss": 0.3759, "loss_nan_ranks": 0, "loss_rank_avg": 0.41108351945877075, "step": 2320, "valid_targets_mean": 1964.9, "valid_targets_min": 869 }, { "epoch": 3.7199999999999998, "grad_norm": 0.6992511851816486, "learning_rate": 1.5179628027056373e-05, "loss": 0.3918, "loss_nan_ranks": 0, "loss_rank_avg": 0.3997647762298584, "step": 2325, "valid_targets_mean": 2162.9, "valid_targets_min": 626 }, { "epoch": 3.7279999999999998, "grad_norm": 0.7600622230811375, "learning_rate": 1.5089340411769257e-05, "loss": 0.3879, "loss_nan_ranks": 0, "loss_rank_avg": 0.39846283197402954, "step": 2330, "valid_targets_mean": 1911.8, "valid_targets_min": 601 }, { "epoch": 3.7359999999999998, "grad_norm": 0.6310808640241407, "learning_rate": 1.499915916944236e-05, "loss": 0.3901, "loss_nan_ranks": 0, "loss_rank_avg": 0.3807406425476074, "step": 2335, "valid_targets_mean": 2493.9, "valid_targets_min": 731 }, { "epoch": 3.7439999999999998, "grad_norm": 0.6385737145727371, "learning_rate": 1.490908625354964e-05, "loss": 0.3892, "loss_nan_ranks": 0, "loss_rank_avg": 0.3948187232017517, "step": 2340, "valid_targets_mean": 2656.6, "valid_targets_min": 642 }, { "epoch": 3.752, "grad_norm": 0.6214858448550059, "learning_rate": 1.4819123615218556e-05, "loss": 0.3878, "loss_nan_ranks": 0, "loss_rank_avg": 0.39337706565856934, "step": 2345, "valid_targets_mean": 2589.9, "valid_targets_min": 581 }, { "epoch": 3.76, "grad_norm": 0.6047668156096683, "learning_rate": 1.472927320318775e-05, "loss": 0.3588, "loss_nan_ranks": 0, "loss_rank_avg": 0.3817969262599945, "step": 2350, "valid_targets_mean": 2547.9, "valid_targets_min": 387 }, { "epoch": 3.768, "grad_norm": 0.6856277125915268, "learning_rate": 1.4639536963764878e-05, "loss": 0.3828, "loss_nan_ranks": 0, "loss_rank_avg": 0.4414476454257965, "step": 2355, "valid_targets_mean": 2318.7, "valid_targets_min": 627 }, { "epoch": 3.776, "grad_norm": 0.6333198424314916, "learning_rate": 1.4549916840784409e-05, "loss": 0.3879, "loss_nan_ranks": 0, "loss_rank_avg": 0.39172789454460144, "step": 2360, "valid_targets_mean": 2391.9, "valid_targets_min": 693 }, { "epoch": 3.784, "grad_norm": 0.6259500970317692, "learning_rate": 1.4460414775565555e-05, "loss": 0.368, "loss_nan_ranks": 0, "loss_rank_avg": 0.34704911708831787, "step": 2365, "valid_targets_mean": 2448.9, "valid_targets_min": 891 }, { "epoch": 3.792, "grad_norm": 0.7594578101901227, "learning_rate": 1.43710327068702e-05, "loss": 0.3796, "loss_nan_ranks": 0, "loss_rank_avg": 0.45061349868774414, "step": 2370, "valid_targets_mean": 1818.4, "valid_targets_min": 685 }, { "epoch": 3.8, "grad_norm": 0.6490031536890803, "learning_rate": 1.4281772570860897e-05, "loss": 0.3716, "loss_nan_ranks": 0, "loss_rank_avg": 0.3952520787715912, "step": 2375, "valid_targets_mean": 2224.6, "valid_targets_min": 740 }, { "epoch": 3.808, "grad_norm": 0.6955497814955105, "learning_rate": 1.4192636301058952e-05, "loss": 0.38, "loss_nan_ranks": 0, "loss_rank_avg": 0.41568028926849365, "step": 2380, "valid_targets_mean": 2112.8, "valid_targets_min": 968 }, { "epoch": 3.816, "grad_norm": 0.5275242338157292, "learning_rate": 1.4103625828302508e-05, "loss": 0.3841, "loss_nan_ranks": 0, "loss_rank_avg": 0.34493115544319153, "step": 2385, "valid_targets_mean": 2959.6, "valid_targets_min": 1957 }, { "epoch": 3.824, "grad_norm": 0.6859861254603057, "learning_rate": 1.4014743080704743e-05, "loss": 0.3844, "loss_nan_ranks": 0, "loss_rank_avg": 0.43106839060783386, "step": 2390, "valid_targets_mean": 2140.1, "valid_targets_min": 507 }, { "epoch": 3.832, "grad_norm": 0.6306309015014626, "learning_rate": 1.3925989983612118e-05, "loss": 0.3795, "loss_nan_ranks": 0, "loss_rank_avg": 0.3861228823661804, "step": 2395, "valid_targets_mean": 2378.4, "valid_targets_min": 1191 }, { "epoch": 3.84, "grad_norm": 0.6393832606090019, "learning_rate": 1.383736845956261e-05, "loss": 0.3611, "loss_nan_ranks": 0, "loss_rank_avg": 0.34870201349258423, "step": 2400, "valid_targets_mean": 2230.6, "valid_targets_min": 608 }, { "epoch": 3.848, "grad_norm": 0.6433318760017774, "learning_rate": 1.3748880428244154e-05, "loss": 0.3895, "loss_nan_ranks": 0, "loss_rank_avg": 0.3842615783214569, "step": 2405, "valid_targets_mean": 2342.8, "valid_targets_min": 683 }, { "epoch": 3.856, "grad_norm": 0.6385186475370088, "learning_rate": 1.3660527806452965e-05, "loss": 0.3729, "loss_nan_ranks": 0, "loss_rank_avg": 0.3636973798274994, "step": 2410, "valid_targets_mean": 2115.1, "valid_targets_min": 629 }, { "epoch": 3.864, "grad_norm": 0.6777060990479569, "learning_rate": 1.3572312508052118e-05, "loss": 0.4042, "loss_nan_ranks": 0, "loss_rank_avg": 0.4158111810684204, "step": 2415, "valid_targets_mean": 2290.8, "valid_targets_min": 484 }, { "epoch": 3.872, "grad_norm": 0.6234999879880668, "learning_rate": 1.3484236443929982e-05, "loss": 0.366, "loss_nan_ranks": 0, "loss_rank_avg": 0.37164247035980225, "step": 2420, "valid_targets_mean": 2397.4, "valid_targets_min": 677 }, { "epoch": 3.88, "grad_norm": 0.6500653023091283, "learning_rate": 1.3396301521958926e-05, "loss": 0.3986, "loss_nan_ranks": 0, "loss_rank_avg": 0.406566321849823, "step": 2425, "valid_targets_mean": 2353.1, "valid_targets_min": 724 }, { "epoch": 3.888, "grad_norm": 0.6199348747856589, "learning_rate": 1.3308509646953934e-05, "loss": 0.3767, "loss_nan_ranks": 0, "loss_rank_avg": 0.35002443194389343, "step": 2430, "valid_targets_mean": 2550.4, "valid_targets_min": 626 }, { "epoch": 3.896, "grad_norm": 0.6118256913662354, "learning_rate": 1.3220862720631349e-05, "loss": 0.3801, "loss_nan_ranks": 0, "loss_rank_avg": 0.37597745656967163, "step": 2435, "valid_targets_mean": 2473.8, "valid_targets_min": 800 }, { "epoch": 3.904, "grad_norm": 0.6165146793403535, "learning_rate": 1.3133362641567697e-05, "loss": 0.3904, "loss_nan_ranks": 0, "loss_rank_avg": 0.3653344511985779, "step": 2440, "valid_targets_mean": 2391.1, "valid_targets_min": 397 }, { "epoch": 3.912, "grad_norm": 0.5984853631086248, "learning_rate": 1.3046011305158546e-05, "loss": 0.3813, "loss_nan_ranks": 0, "loss_rank_avg": 0.3555792570114136, "step": 2445, "valid_targets_mean": 2439.0, "valid_targets_min": 788 }, { "epoch": 3.92, "grad_norm": 0.6517963353353854, "learning_rate": 1.2958810603577456e-05, "loss": 0.3871, "loss_nan_ranks": 0, "loss_rank_avg": 0.38606372475624084, "step": 2450, "valid_targets_mean": 2505.0, "valid_targets_min": 654 }, { "epoch": 3.928, "grad_norm": 0.6204479452047876, "learning_rate": 1.2871762425734989e-05, "loss": 0.38, "loss_nan_ranks": 0, "loss_rank_avg": 0.3570050299167633, "step": 2455, "valid_targets_mean": 2287.1, "valid_targets_min": 484 }, { "epoch": 3.936, "grad_norm": 0.6468990589515311, "learning_rate": 1.278486865723779e-05, "loss": 0.3826, "loss_nan_ranks": 0, "loss_rank_avg": 0.388399600982666, "step": 2460, "valid_targets_mean": 2331.8, "valid_targets_min": 699 }, { "epoch": 3.944, "grad_norm": 0.8669461873958879, "learning_rate": 1.269813118034775e-05, "loss": 0.3805, "loss_nan_ranks": 0, "loss_rank_avg": 0.39867132902145386, "step": 2465, "valid_targets_mean": 2261.4, "valid_targets_min": 689 }, { "epoch": 3.952, "grad_norm": 0.6915041689439219, "learning_rate": 1.2611551873941213e-05, "loss": 0.3949, "loss_nan_ranks": 0, "loss_rank_avg": 0.421209454536438, "step": 2470, "valid_targets_mean": 2146.9, "valid_targets_min": 538 }, { "epoch": 3.96, "grad_norm": 0.5609071746113083, "learning_rate": 1.2525132613468309e-05, "loss": 0.4007, "loss_nan_ranks": 0, "loss_rank_avg": 0.3427595794200897, "step": 2475, "valid_targets_mean": 2587.0, "valid_targets_min": 711 }, { "epoch": 3.968, "grad_norm": 0.7370047486079004, "learning_rate": 1.2438875270912294e-05, "loss": 0.3862, "loss_nan_ranks": 0, "loss_rank_avg": 0.4031817317008972, "step": 2480, "valid_targets_mean": 1953.1, "valid_targets_min": 615 }, { "epoch": 3.976, "grad_norm": 0.5534754324867939, "learning_rate": 1.2352781714749016e-05, "loss": 0.3748, "loss_nan_ranks": 0, "loss_rank_avg": 0.35224318504333496, "step": 2485, "valid_targets_mean": 2763.1, "valid_targets_min": 846 }, { "epoch": 3.984, "grad_norm": 0.6023895294851896, "learning_rate": 1.2266853809906469e-05, "loss": 0.3817, "loss_nan_ranks": 0, "loss_rank_avg": 0.35587412118911743, "step": 2490, "valid_targets_mean": 2325.6, "valid_targets_min": 573 }, { "epoch": 3.992, "grad_norm": 0.5933142283195199, "learning_rate": 1.2181093417724317e-05, "loss": 0.375, "loss_nan_ranks": 0, "loss_rank_avg": 0.36003753542900085, "step": 2495, "valid_targets_mean": 2494.0, "valid_targets_min": 949 }, { "epoch": 4.0, "grad_norm": 0.5988758088534348, "learning_rate": 1.2095502395913676e-05, "loss": 0.3703, "loss_nan_ranks": 0, "loss_rank_avg": 0.36336004734039307, "step": 2500, "valid_targets_mean": 2681.9, "valid_targets_min": 1168 }, { "epoch": 4.008, "grad_norm": 0.659987854618312, "learning_rate": 1.2010082598516775e-05, "loss": 0.3587, "loss_nan_ranks": 0, "loss_rank_avg": 0.3814525604248047, "step": 2505, "valid_targets_mean": 2159.9, "valid_targets_min": 686 }, { "epoch": 4.016, "grad_norm": 0.6373533465376138, "learning_rate": 1.1924835875866884e-05, "loss": 0.3691, "loss_nan_ranks": 0, "loss_rank_avg": 0.3539432883262634, "step": 2510, "valid_targets_mean": 2507.6, "valid_targets_min": 585 }, { "epoch": 4.024, "grad_norm": 0.6946756197147173, "learning_rate": 1.1839764074548145e-05, "loss": 0.3564, "loss_nan_ranks": 0, "loss_rank_avg": 0.35100963711738586, "step": 2515, "valid_targets_mean": 2131.2, "valid_targets_min": 560 }, { "epoch": 4.032, "grad_norm": 0.7462607180687901, "learning_rate": 1.1754869037355659e-05, "loss": 0.3708, "loss_nan_ranks": 0, "loss_rank_avg": 0.3982384502887726, "step": 2520, "valid_targets_mean": 1920.0, "valid_targets_min": 481 }, { "epoch": 4.04, "grad_norm": 0.7206423837270861, "learning_rate": 1.1670152603255504e-05, "loss": 0.3772, "loss_nan_ranks": 0, "loss_rank_avg": 0.4138847887516022, "step": 2525, "valid_targets_mean": 2038.5, "valid_targets_min": 718 }, { "epoch": 4.048, "grad_norm": 0.6951181641143632, "learning_rate": 1.1585616607344909e-05, "loss": 0.3671, "loss_nan_ranks": 0, "loss_rank_avg": 0.3845224380493164, "step": 2530, "valid_targets_mean": 2092.4, "valid_targets_min": 724 }, { "epoch": 4.056, "grad_norm": 0.679395145581085, "learning_rate": 1.1501262880812547e-05, "loss": 0.3619, "loss_nan_ranks": 0, "loss_rank_avg": 0.39052027463912964, "step": 2535, "valid_targets_mean": 2193.2, "valid_targets_min": 805 }, { "epoch": 4.064, "grad_norm": 0.5721838907114962, "learning_rate": 1.141709325089881e-05, "loss": 0.3583, "loss_nan_ranks": 0, "loss_rank_avg": 0.32457679510116577, "step": 2540, "valid_targets_mean": 2973.6, "valid_targets_min": 1569 }, { "epoch": 4.072, "grad_norm": 0.6338114426507533, "learning_rate": 1.1333109540856257e-05, "loss": 0.3688, "loss_nan_ranks": 0, "loss_rank_avg": 0.35262852907180786, "step": 2545, "valid_targets_mean": 2444.9, "valid_targets_min": 824 }, { "epoch": 4.08, "grad_norm": 0.6708275619659173, "learning_rate": 1.1249313569910143e-05, "loss": 0.3703, "loss_nan_ranks": 0, "loss_rank_avg": 0.3734751343727112, "step": 2550, "valid_targets_mean": 2353.8, "valid_targets_min": 716 }, { "epoch": 4.088, "grad_norm": 0.6378454319068008, "learning_rate": 1.1165707153218942e-05, "loss": 0.3572, "loss_nan_ranks": 0, "loss_rank_avg": 0.35157474875450134, "step": 2555, "valid_targets_mean": 2397.9, "valid_targets_min": 580 }, { "epoch": 4.096, "grad_norm": 0.6640784937904208, "learning_rate": 1.1082292101835121e-05, "loss": 0.3753, "loss_nan_ranks": 0, "loss_rank_avg": 0.3626524806022644, "step": 2560, "valid_targets_mean": 2538.4, "valid_targets_min": 877 }, { "epoch": 4.104, "grad_norm": 0.6217842243587747, "learning_rate": 1.099907022266582e-05, "loss": 0.3755, "loss_nan_ranks": 0, "loss_rank_avg": 0.36767810583114624, "step": 2565, "valid_targets_mean": 2459.2, "valid_targets_min": 743 }, { "epoch": 4.112, "grad_norm": 0.6808459480743844, "learning_rate": 1.0916043318433767e-05, "loss": 0.3597, "loss_nan_ranks": 0, "loss_rank_avg": 0.35825079679489136, "step": 2570, "valid_targets_mean": 2529.9, "valid_targets_min": 834 }, { "epoch": 4.12, "grad_norm": 0.6106311108516793, "learning_rate": 1.0833213187638203e-05, "loss": 0.3769, "loss_nan_ranks": 0, "loss_rank_avg": 0.3592633008956909, "step": 2575, "valid_targets_mean": 2422.1, "valid_targets_min": 1018 }, { "epoch": 4.128, "grad_norm": 0.6479267624633701, "learning_rate": 1.0750581624515957e-05, "loss": 0.3654, "loss_nan_ranks": 0, "loss_rank_avg": 0.37433257699012756, "step": 2580, "valid_targets_mean": 2348.7, "valid_targets_min": 814 }, { "epoch": 4.136, "grad_norm": 0.6691291039177731, "learning_rate": 1.0668150419002527e-05, "loss": 0.3672, "loss_nan_ranks": 0, "loss_rank_avg": 0.36849620938301086, "step": 2585, "valid_targets_mean": 2256.8, "valid_targets_min": 567 }, { "epoch": 4.144, "grad_norm": 0.6048408706080803, "learning_rate": 1.0585921356693349e-05, "loss": 0.3519, "loss_nan_ranks": 0, "loss_rank_avg": 0.33743125200271606, "step": 2590, "valid_targets_mean": 2755.6, "valid_targets_min": 992 }, { "epoch": 4.152, "grad_norm": 0.6278027947650242, "learning_rate": 1.0503896218805112e-05, "loss": 0.3547, "loss_nan_ranks": 0, "loss_rank_avg": 0.32869869470596313, "step": 2595, "valid_targets_mean": 2364.7, "valid_targets_min": 475 }, { "epoch": 4.16, "grad_norm": 0.6472405304733959, "learning_rate": 1.0422076782137155e-05, "loss": 0.3565, "loss_nan_ranks": 0, "loss_rank_avg": 0.34370821714401245, "step": 2600, "valid_targets_mean": 2329.3, "valid_targets_min": 650 }, { "epoch": 4.168, "grad_norm": 0.7061910322339049, "learning_rate": 1.0340464819032991e-05, "loss": 0.3632, "loss_nan_ranks": 0, "loss_rank_avg": 0.3962213099002838, "step": 2605, "valid_targets_mean": 2245.4, "valid_targets_min": 900 }, { "epoch": 4.176, "grad_norm": 0.6378995168442645, "learning_rate": 1.0259062097341911e-05, "loss": 0.3638, "loss_nan_ranks": 0, "loss_rank_avg": 0.3800092339515686, "step": 2610, "valid_targets_mean": 2487.3, "valid_targets_min": 1426 }, { "epoch": 4.184, "grad_norm": 0.7059748137437399, "learning_rate": 1.017787038038071e-05, "loss": 0.3631, "loss_nan_ranks": 0, "loss_rank_avg": 0.35289984941482544, "step": 2615, "valid_targets_mean": 1940.3, "valid_targets_min": 862 }, { "epoch": 4.192, "grad_norm": 0.6696113201692014, "learning_rate": 1.0096891426895476e-05, "loss": 0.3729, "loss_nan_ranks": 0, "loss_rank_avg": 0.3574759364128113, "step": 2620, "valid_targets_mean": 2335.2, "valid_targets_min": 672 }, { "epoch": 4.2, "grad_norm": 0.6869753483812769, "learning_rate": 1.0016126991023447e-05, "loss": 0.3671, "loss_nan_ranks": 0, "loss_rank_avg": 0.36144423484802246, "step": 2625, "valid_targets_mean": 2039.0, "valid_targets_min": 684 }, { "epoch": 4.208, "grad_norm": 0.6559234434297613, "learning_rate": 9.935578822255113e-06, "loss": 0.3639, "loss_nan_ranks": 0, "loss_rank_avg": 0.361632764339447, "step": 2630, "valid_targets_mean": 2439.4, "valid_targets_min": 948 }, { "epoch": 4.216, "grad_norm": 0.6599510201021159, "learning_rate": 9.855248665396218e-06, "loss": 0.3844, "loss_nan_ranks": 0, "loss_rank_avg": 0.36938899755477905, "step": 2635, "valid_targets_mean": 2472.2, "valid_targets_min": 751 }, { "epoch": 4.224, "grad_norm": 0.7649708638971318, "learning_rate": 9.775138260530046e-06, "loss": 0.3652, "loss_nan_ranks": 0, "loss_rank_avg": 0.40082091093063354, "step": 2640, "valid_targets_mean": 1781.4, "valid_targets_min": 626 }, { "epoch": 4.232, "grad_norm": 0.6143245924456161, "learning_rate": 9.695249342979667e-06, "loss": 0.3429, "loss_nan_ranks": 0, "loss_rank_avg": 0.3415009379386902, "step": 2645, "valid_targets_mean": 2800.2, "valid_targets_min": 1405 }, { "epoch": 4.24, "grad_norm": 0.5846688132687459, "learning_rate": 9.615583643270371e-06, "loss": 0.3559, "loss_nan_ranks": 0, "loss_rank_avg": 0.32217174768447876, "step": 2650, "valid_targets_mean": 2645.8, "valid_targets_min": 948 }, { "epoch": 4.248, "grad_norm": 0.6603892872326103, "learning_rate": 9.536142887092208e-06, "loss": 0.3762, "loss_nan_ranks": 0, "loss_rank_avg": 0.367895245552063, "step": 2655, "valid_targets_mean": 2176.4, "valid_targets_min": 638 }, { "epoch": 4.256, "grad_norm": 0.6203923641190354, "learning_rate": 9.456928795262552e-06, "loss": 0.3632, "loss_nan_ranks": 0, "loss_rank_avg": 0.3404765725135803, "step": 2660, "valid_targets_mean": 2438.1, "valid_targets_min": 891 }, { "epoch": 4.264, "grad_norm": 0.5972320780019671, "learning_rate": 9.377943083688873e-06, "loss": 0.359, "loss_nan_ranks": 0, "loss_rank_avg": 0.35350432991981506, "step": 2665, "valid_targets_mean": 2765.6, "valid_targets_min": 1749 }, { "epoch": 4.272, "grad_norm": 0.6514979793709264, "learning_rate": 9.29918746333153e-06, "loss": 0.3623, "loss_nan_ranks": 0, "loss_rank_avg": 0.373460054397583, "step": 2670, "valid_targets_mean": 2471.8, "valid_targets_min": 1143 }, { "epoch": 4.28, "grad_norm": 0.5759073825154406, "learning_rate": 9.220663640166756e-06, "loss": 0.3321, "loss_nan_ranks": 0, "loss_rank_avg": 0.32004714012145996, "step": 2675, "valid_targets_mean": 2779.6, "valid_targets_min": 1575 }, { "epoch": 4.288, "grad_norm": 0.6467606600817037, "learning_rate": 9.142373315149655e-06, "loss": 0.3544, "loss_nan_ranks": 0, "loss_rank_avg": 0.34785300493240356, "step": 2680, "valid_targets_mean": 2261.9, "valid_targets_min": 735 }, { "epoch": 4.296, "grad_norm": 0.6203071379346831, "learning_rate": 9.064318184177373e-06, "loss": 0.3563, "loss_nan_ranks": 0, "loss_rank_avg": 0.34045273065567017, "step": 2685, "valid_targets_mean": 2397.2, "valid_targets_min": 714 }, { "epoch": 4.304, "grad_norm": 0.5913268860832173, "learning_rate": 8.986499938052396e-06, "loss": 0.3564, "loss_nan_ranks": 0, "loss_rank_avg": 0.33473074436187744, "step": 2690, "valid_targets_mean": 2817.1, "valid_targets_min": 816 }, { "epoch": 4.312, "grad_norm": 0.672376259320334, "learning_rate": 8.908920262445859e-06, "loss": 0.3752, "loss_nan_ranks": 0, "loss_rank_avg": 0.3796904683113098, "step": 2695, "valid_targets_mean": 2770.5, "valid_targets_min": 931 }, { "epoch": 4.32, "grad_norm": 0.676521253764822, "learning_rate": 8.831580837861082e-06, "loss": 0.3626, "loss_nan_ranks": 0, "loss_rank_avg": 0.3628620207309723, "step": 2700, "valid_targets_mean": 2069.5, "valid_targets_min": 825 }, { "epoch": 4.328, "grad_norm": 0.6437422459664721, "learning_rate": 8.754483339597166e-06, "loss": 0.3791, "loss_nan_ranks": 0, "loss_rank_avg": 0.3925166428089142, "step": 2705, "valid_targets_mean": 2486.3, "valid_targets_min": 850 }, { "epoch": 4.336, "grad_norm": 0.6908108744201664, "learning_rate": 8.677629437712665e-06, "loss": 0.3641, "loss_nan_ranks": 0, "loss_rank_avg": 0.3935179114341736, "step": 2710, "valid_targets_mean": 2178.8, "valid_targets_min": 576 }, { "epoch": 4.344, "grad_norm": 0.5759636363073697, "learning_rate": 8.601020796989467e-06, "loss": 0.3681, "loss_nan_ranks": 0, "loss_rank_avg": 0.3588644862174988, "step": 2715, "valid_targets_mean": 2973.3, "valid_targets_min": 1601 }, { "epoch": 4.352, "grad_norm": 0.7140271324248649, "learning_rate": 8.524659076896656e-06, "loss": 0.3752, "loss_nan_ranks": 0, "loss_rank_avg": 0.39689189195632935, "step": 2720, "valid_targets_mean": 2259.2, "valid_targets_min": 724 }, { "epoch": 4.36, "grad_norm": 0.6334622079660887, "learning_rate": 8.448545931554652e-06, "loss": 0.3649, "loss_nan_ranks": 0, "loss_rank_avg": 0.33897995948791504, "step": 2725, "valid_targets_mean": 2458.4, "valid_targets_min": 614 }, { "epoch": 4.368, "grad_norm": 0.771518737688984, "learning_rate": 8.372683009699307e-06, "loss": 0.3802, "loss_nan_ranks": 0, "loss_rank_avg": 0.40696775913238525, "step": 2730, "valid_targets_mean": 2110.1, "valid_targets_min": 842 }, { "epoch": 4.376, "grad_norm": 0.6565853512831313, "learning_rate": 8.297071954646248e-06, "loss": 0.3717, "loss_nan_ranks": 0, "loss_rank_avg": 0.34116989374160767, "step": 2735, "valid_targets_mean": 2366.1, "valid_targets_min": 894 }, { "epoch": 4.384, "grad_norm": 0.7160416008305666, "learning_rate": 8.22171440425523e-06, "loss": 0.3621, "loss_nan_ranks": 0, "loss_rank_avg": 0.3710840940475464, "step": 2740, "valid_targets_mean": 2159.7, "valid_targets_min": 576 }, { "epoch": 4.392, "grad_norm": 0.5817023955144996, "learning_rate": 8.146611990894683e-06, "loss": 0.3483, "loss_nan_ranks": 0, "loss_rank_avg": 0.32857975363731384, "step": 2745, "valid_targets_mean": 2690.8, "valid_targets_min": 1464 }, { "epoch": 4.4, "grad_norm": 0.7255898568089918, "learning_rate": 8.071766341406363e-06, "loss": 0.3671, "loss_nan_ranks": 0, "loss_rank_avg": 0.40352243185043335, "step": 2750, "valid_targets_mean": 2041.8, "valid_targets_min": 702 }, { "epoch": 4.408, "grad_norm": 0.5782232874230977, "learning_rate": 7.997179077070092e-06, "loss": 0.3474, "loss_nan_ranks": 0, "loss_rank_avg": 0.32014548778533936, "step": 2755, "valid_targets_mean": 2697.8, "valid_targets_min": 615 }, { "epoch": 4.416, "grad_norm": 0.6458589269530536, "learning_rate": 7.92285181356864e-06, "loss": 0.359, "loss_nan_ranks": 0, "loss_rank_avg": 0.37554532289505005, "step": 2760, "valid_targets_mean": 2480.9, "valid_targets_min": 951 }, { "epoch": 4.424, "grad_norm": 0.7021766881006695, "learning_rate": 7.848786160952726e-06, "loss": 0.3735, "loss_nan_ranks": 0, "loss_rank_avg": 0.38606834411621094, "step": 2765, "valid_targets_mean": 2181.6, "valid_targets_min": 957 }, { "epoch": 4.432, "grad_norm": 0.6274510241809619, "learning_rate": 7.77498372360617e-06, "loss": 0.372, "loss_nan_ranks": 0, "loss_rank_avg": 0.3752686083316803, "step": 2770, "valid_targets_mean": 2571.4, "valid_targets_min": 1247 }, { "epoch": 4.44, "grad_norm": 0.5787589565707946, "learning_rate": 7.701446100211095e-06, "loss": 0.3831, "loss_nan_ranks": 0, "loss_rank_avg": 0.33762532472610474, "step": 2775, "valid_targets_mean": 2774.3, "valid_targets_min": 870 }, { "epoch": 4.448, "grad_norm": 0.6479393506208262, "learning_rate": 7.628174883713322e-06, "loss": 0.3532, "loss_nan_ranks": 0, "loss_rank_avg": 0.3586277365684509, "step": 2780, "valid_targets_mean": 2451.2, "valid_targets_min": 1111 }, { "epoch": 4.456, "grad_norm": 0.7036621463124177, "learning_rate": 7.555171661287875e-06, "loss": 0.3826, "loss_nan_ranks": 0, "loss_rank_avg": 0.38187170028686523, "step": 2785, "valid_targets_mean": 2242.8, "valid_targets_min": 1165 }, { "epoch": 4.464, "grad_norm": 0.7404371740874729, "learning_rate": 7.482438014304567e-06, "loss": 0.3625, "loss_nan_ranks": 0, "loss_rank_avg": 0.3355005383491516, "step": 2790, "valid_targets_mean": 2598.3, "valid_targets_min": 496 }, { "epoch": 4.4719999999999995, "grad_norm": 0.5856491425898046, "learning_rate": 7.4099755182937685e-06, "loss": 0.3742, "loss_nan_ranks": 0, "loss_rank_avg": 0.35368943214416504, "step": 2795, "valid_targets_mean": 2671.3, "valid_targets_min": 716 }, { "epoch": 4.48, "grad_norm": 0.7412936316470807, "learning_rate": 7.337785742912289e-06, "loss": 0.3762, "loss_nan_ranks": 0, "loss_rank_avg": 0.38273218274116516, "step": 2800, "valid_targets_mean": 1994.7, "valid_targets_min": 714 }, { "epoch": 4.4879999999999995, "grad_norm": 0.6176390800611116, "learning_rate": 7.265870251909335e-06, "loss": 0.363, "loss_nan_ranks": 0, "loss_rank_avg": 0.33250662684440613, "step": 2805, "valid_targets_mean": 2466.8, "valid_targets_min": 548 }, { "epoch": 4.496, "grad_norm": 0.6081405178337042, "learning_rate": 7.194230603092697e-06, "loss": 0.3704, "loss_nan_ranks": 0, "loss_rank_avg": 0.33933448791503906, "step": 2810, "valid_targets_mean": 2663.7, "valid_targets_min": 942 }, { "epoch": 4.504, "grad_norm": 0.6614502083929048, "learning_rate": 7.122868348294927e-06, "loss": 0.3581, "loss_nan_ranks": 0, "loss_rank_avg": 0.34448274970054626, "step": 2815, "valid_targets_mean": 2118.9, "valid_targets_min": 396 }, { "epoch": 4.5120000000000005, "grad_norm": 0.6638253505646341, "learning_rate": 7.051785033339804e-06, "loss": 0.37, "loss_nan_ranks": 0, "loss_rank_avg": 0.37338095903396606, "step": 2820, "valid_targets_mean": 2290.4, "valid_targets_min": 484 }, { "epoch": 4.52, "grad_norm": 0.6637334464053732, "learning_rate": 6.980982198008785e-06, "loss": 0.3612, "loss_nan_ranks": 0, "loss_rank_avg": 0.3786088824272156, "step": 2825, "valid_targets_mean": 2202.2, "valid_targets_min": 1287 }, { "epoch": 4.5280000000000005, "grad_norm": 0.6350340466502454, "learning_rate": 6.910461376007704e-06, "loss": 0.3754, "loss_nan_ranks": 0, "loss_rank_avg": 0.3658716678619385, "step": 2830, "valid_targets_mean": 2444.1, "valid_targets_min": 794 }, { "epoch": 4.536, "grad_norm": 0.6901975392443126, "learning_rate": 6.840224094933501e-06, "loss": 0.3783, "loss_nan_ranks": 0, "loss_rank_avg": 0.39317822456359863, "step": 2835, "valid_targets_mean": 2204.8, "valid_targets_min": 1276 }, { "epoch": 4.5440000000000005, "grad_norm": 0.6516495367984168, "learning_rate": 6.7702718762411505e-06, "loss": 0.3593, "loss_nan_ranks": 0, "loss_rank_avg": 0.369767963886261, "step": 2840, "valid_targets_mean": 2537.8, "valid_targets_min": 1372 }, { "epoch": 4.552, "grad_norm": 0.6329226215152103, "learning_rate": 6.700606235210731e-06, "loss": 0.3512, "loss_nan_ranks": 0, "loss_rank_avg": 0.35097938776016235, "step": 2845, "valid_targets_mean": 2429.1, "valid_targets_min": 1097 }, { "epoch": 4.5600000000000005, "grad_norm": 0.7490965587225468, "learning_rate": 6.631228680914558e-06, "loss": 0.3881, "loss_nan_ranks": 0, "loss_rank_avg": 0.39271023869514465, "step": 2850, "valid_targets_mean": 2262.6, "valid_targets_min": 654 }, { "epoch": 4.568, "grad_norm": 0.6301883634712518, "learning_rate": 6.562140716184515e-06, "loss": 0.351, "loss_nan_ranks": 0, "loss_rank_avg": 0.3484507203102112, "step": 2855, "valid_targets_mean": 2629.8, "valid_targets_min": 945 }, { "epoch": 4.576, "grad_norm": 0.7290224675401246, "learning_rate": 6.493343837579511e-06, "loss": 0.3677, "loss_nan_ranks": 0, "loss_rank_avg": 0.38203197717666626, "step": 2860, "valid_targets_mean": 1948.4, "valid_targets_min": 663 }, { "epoch": 4.584, "grad_norm": 0.6646731504746918, "learning_rate": 6.424839535353045e-06, "loss": 0.3734, "loss_nan_ranks": 0, "loss_rank_avg": 0.3883858025074005, "step": 2865, "valid_targets_mean": 2372.2, "valid_targets_min": 729 }, { "epoch": 4.592, "grad_norm": 0.6719090701487639, "learning_rate": 6.356629293420926e-06, "loss": 0.3708, "loss_nan_ranks": 0, "loss_rank_avg": 0.40090903639793396, "step": 2870, "valid_targets_mean": 2423.6, "valid_targets_min": 681 }, { "epoch": 4.6, "grad_norm": 0.5921698249244614, "learning_rate": 6.28871458932913e-06, "loss": 0.3708, "loss_nan_ranks": 0, "loss_rank_avg": 0.3327995836734772, "step": 2875, "valid_targets_mean": 2652.3, "valid_targets_min": 683 }, { "epoch": 4.608, "grad_norm": 0.6236530614592126, "learning_rate": 6.2210968942218206e-06, "loss": 0.3673, "loss_nan_ranks": 0, "loss_rank_avg": 0.35564684867858887, "step": 2880, "valid_targets_mean": 2732.6, "valid_targets_min": 1033 }, { "epoch": 4.616, "grad_norm": 0.6938137658003077, "learning_rate": 6.153777672809438e-06, "loss": 0.3705, "loss_nan_ranks": 0, "loss_rank_avg": 0.3655781149864197, "step": 2885, "valid_targets_mean": 2043.9, "valid_targets_min": 1157 }, { "epoch": 4.624, "grad_norm": 0.6620370706764436, "learning_rate": 6.086758383336984e-06, "loss": 0.376, "loss_nan_ranks": 0, "loss_rank_avg": 0.38660040497779846, "step": 2890, "valid_targets_mean": 2444.5, "valid_targets_min": 710 }, { "epoch": 4.632, "grad_norm": 0.6810382275063193, "learning_rate": 6.0200404775524715e-06, "loss": 0.3696, "loss_nan_ranks": 0, "loss_rank_avg": 0.3601524829864502, "step": 2895, "valid_targets_mean": 2163.6, "valid_targets_min": 721 }, { "epoch": 4.64, "grad_norm": 0.5894159349358187, "learning_rate": 5.9536254006754155e-06, "loss": 0.3634, "loss_nan_ranks": 0, "loss_rank_avg": 0.3503599166870117, "step": 2900, "valid_targets_mean": 2725.6, "valid_targets_min": 417 }, { "epoch": 4.648, "grad_norm": 0.6496198594431579, "learning_rate": 5.887514591365593e-06, "loss": 0.3586, "loss_nan_ranks": 0, "loss_rank_avg": 0.3693704605102539, "step": 2905, "valid_targets_mean": 2608.0, "valid_targets_min": 1287 }, { "epoch": 4.656, "grad_norm": 0.6957437044968604, "learning_rate": 5.821709481691798e-06, "loss": 0.3611, "loss_nan_ranks": 0, "loss_rank_avg": 0.37342962622642517, "step": 2910, "valid_targets_mean": 2159.9, "valid_targets_min": 704 }, { "epoch": 4.664, "grad_norm": 0.6264433026530039, "learning_rate": 5.75621149710091e-06, "loss": 0.3665, "loss_nan_ranks": 0, "loss_rank_avg": 0.3909727931022644, "step": 2915, "valid_targets_mean": 2693.1, "valid_targets_min": 1187 }, { "epoch": 4.672, "grad_norm": 0.6244229147955063, "learning_rate": 5.691022056386961e-06, "loss": 0.363, "loss_nan_ranks": 0, "loss_rank_avg": 0.3780739903450012, "step": 2920, "valid_targets_mean": 2513.6, "valid_targets_min": 926 }, { "epoch": 4.68, "grad_norm": 0.664503211885057, "learning_rate": 5.6261425716604136e-06, "loss": 0.3697, "loss_nan_ranks": 0, "loss_rank_avg": 0.37827444076538086, "step": 2925, "valid_targets_mean": 2439.2, "valid_targets_min": 1019 }, { "epoch": 4.688, "grad_norm": 0.6098168284779346, "learning_rate": 5.56157444831757e-06, "loss": 0.3662, "loss_nan_ranks": 0, "loss_rank_avg": 0.3484095335006714, "step": 2930, "valid_targets_mean": 2730.8, "valid_targets_min": 1474 }, { "epoch": 4.696, "grad_norm": 0.6159163143967894, "learning_rate": 5.4973190850101334e-06, "loss": 0.3478, "loss_nan_ranks": 0, "loss_rank_avg": 0.33030563592910767, "step": 2935, "valid_targets_mean": 2694.9, "valid_targets_min": 692 }, { "epoch": 4.704, "grad_norm": 0.9275789347832416, "learning_rate": 5.433377873614925e-06, "loss": 0.372, "loss_nan_ranks": 0, "loss_rank_avg": 0.36244189739227295, "step": 2940, "valid_targets_mean": 2217.1, "valid_targets_min": 643 }, { "epoch": 4.712, "grad_norm": 0.6794374048872912, "learning_rate": 5.369752199203702e-06, "loss": 0.3752, "loss_nan_ranks": 0, "loss_rank_avg": 0.36548617482185364, "step": 2945, "valid_targets_mean": 2390.0, "valid_targets_min": 1189 }, { "epoch": 4.72, "grad_norm": 0.6481855604133522, "learning_rate": 5.306443440013171e-06, "loss": 0.3643, "loss_nan_ranks": 0, "loss_rank_avg": 0.36283496022224426, "step": 2950, "valid_targets_mean": 2627.6, "valid_targets_min": 1706 }, { "epoch": 4.728, "grad_norm": 0.6283996969199868, "learning_rate": 5.243452967415155e-06, "loss": 0.3517, "loss_nan_ranks": 0, "loss_rank_avg": 0.34274983406066895, "step": 2955, "valid_targets_mean": 2579.9, "valid_targets_min": 570 }, { "epoch": 4.736, "grad_norm": 0.6221382437380744, "learning_rate": 5.180782145886846e-06, "loss": 0.3735, "loss_nan_ranks": 0, "loss_rank_avg": 0.3483470678329468, "step": 2960, "valid_targets_mean": 2586.2, "valid_targets_min": 858 }, { "epoch": 4.744, "grad_norm": 0.6284889980995106, "learning_rate": 5.118432332981273e-06, "loss": 0.3877, "loss_nan_ranks": 0, "loss_rank_avg": 0.36552244424819946, "step": 2965, "valid_targets_mean": 2559.1, "valid_targets_min": 956 }, { "epoch": 4.752, "grad_norm": 0.6678458055329841, "learning_rate": 5.056404879297887e-06, "loss": 0.3507, "loss_nan_ranks": 0, "loss_rank_avg": 0.34571802616119385, "step": 2970, "valid_targets_mean": 2412.1, "valid_targets_min": 644 }, { "epoch": 4.76, "grad_norm": 0.6924660368387833, "learning_rate": 4.994701128453325e-06, "loss": 0.3617, "loss_nan_ranks": 0, "loss_rank_avg": 0.39678865671157837, "step": 2975, "valid_targets_mean": 2279.1, "valid_targets_min": 1016 }, { "epoch": 4.768, "grad_norm": 0.7809541899728214, "learning_rate": 4.933322417052269e-06, "loss": 0.3692, "loss_nan_ranks": 0, "loss_rank_avg": 0.3663102388381958, "step": 2980, "valid_targets_mean": 2255.8, "valid_targets_min": 637 }, { "epoch": 4.776, "grad_norm": 0.5751616696088279, "learning_rate": 4.8722700746585135e-06, "loss": 0.3519, "loss_nan_ranks": 0, "loss_rank_avg": 0.30202430486679077, "step": 2985, "valid_targets_mean": 2724.5, "valid_targets_min": 507 }, { "epoch": 4.784, "grad_norm": 0.7486775531571893, "learning_rate": 4.811545423766184e-06, "loss": 0.3779, "loss_nan_ranks": 0, "loss_rank_avg": 0.39271706342697144, "step": 2990, "valid_targets_mean": 1848.8, "valid_targets_min": 691 }, { "epoch": 4.792, "grad_norm": 0.6704024436269385, "learning_rate": 4.75114977977104e-06, "loss": 0.3689, "loss_nan_ranks": 0, "loss_rank_avg": 0.35405421257019043, "step": 2995, "valid_targets_mean": 2175.5, "valid_targets_min": 626 }, { "epoch": 4.8, "grad_norm": 0.6512417198114752, "learning_rate": 4.691084450942047e-06, "loss": 0.3515, "loss_nan_ranks": 0, "loss_rank_avg": 0.35529059171676636, "step": 3000, "valid_targets_mean": 2353.8, "valid_targets_min": 969 }, { "epoch": 4.808, "grad_norm": 0.6723125318736338, "learning_rate": 4.631350738392955e-06, "loss": 0.3671, "loss_nan_ranks": 0, "loss_rank_avg": 0.3929918110370636, "step": 3005, "valid_targets_mean": 2460.7, "valid_targets_min": 749 }, { "epoch": 4.816, "grad_norm": 0.6545635253453368, "learning_rate": 4.571949936054197e-06, "loss": 0.35, "loss_nan_ranks": 0, "loss_rank_avg": 0.3621842861175537, "step": 3010, "valid_targets_mean": 2419.5, "valid_targets_min": 1052 }, { "epoch": 4.824, "grad_norm": 0.8060013677435842, "learning_rate": 4.512883330644815e-06, "loss": 0.3816, "loss_nan_ranks": 0, "loss_rank_avg": 0.370906263589859, "step": 3015, "valid_targets_mean": 2090.8, "valid_targets_min": 762 }, { "epoch": 4.832, "grad_norm": 0.785168334536275, "learning_rate": 4.454152201644591e-06, "loss": 0.3872, "loss_nan_ranks": 0, "loss_rank_avg": 0.394753634929657, "step": 3020, "valid_targets_mean": 1937.4, "valid_targets_min": 601 }, { "epoch": 4.84, "grad_norm": 0.6806318836309488, "learning_rate": 4.395757821266333e-06, "loss": 0.3591, "loss_nan_ranks": 0, "loss_rank_avg": 0.36405467987060547, "step": 3025, "valid_targets_mean": 2313.8, "valid_targets_min": 612 }, { "epoch": 4.848, "grad_norm": 0.7058728895084995, "learning_rate": 4.337701454428322e-06, "loss": 0.3654, "loss_nan_ranks": 0, "loss_rank_avg": 0.40474677085876465, "step": 3030, "valid_targets_mean": 2222.8, "valid_targets_min": 669 }, { "epoch": 4.856, "grad_norm": 0.6290075033695414, "learning_rate": 4.279984358726925e-06, "loss": 0.3746, "loss_nan_ranks": 0, "loss_rank_avg": 0.3571993112564087, "step": 3035, "valid_targets_mean": 2527.0, "valid_targets_min": 871 }, { "epoch": 4.864, "grad_norm": 0.6736298727323675, "learning_rate": 4.2226077844093205e-06, "loss": 0.3614, "loss_nan_ranks": 0, "loss_rank_avg": 0.3846289813518524, "step": 3040, "valid_targets_mean": 2610.7, "valid_targets_min": 1160 }, { "epoch": 4.872, "grad_norm": 0.6196230514188242, "learning_rate": 4.165572974346435e-06, "loss": 0.355, "loss_nan_ranks": 0, "loss_rank_avg": 0.354499489068985, "step": 3045, "valid_targets_mean": 2464.2, "valid_targets_min": 666 }, { "epoch": 4.88, "grad_norm": 0.6198778408734426, "learning_rate": 4.108881164006033e-06, "loss": 0.3563, "loss_nan_ranks": 0, "loss_rank_avg": 0.347675085067749, "step": 3050, "valid_targets_mean": 2617.9, "valid_targets_min": 728 }, { "epoch": 4.888, "grad_norm": 0.6632240636048407, "learning_rate": 4.05253358142593e-06, "loss": 0.3588, "loss_nan_ranks": 0, "loss_rank_avg": 0.3727016746997833, "step": 3055, "valid_targets_mean": 2414.1, "valid_targets_min": 694 }, { "epoch": 4.896, "grad_norm": 0.6322639539099554, "learning_rate": 3.9965314471874035e-06, "loss": 0.3576, "loss_nan_ranks": 0, "loss_rank_avg": 0.3659431040287018, "step": 3060, "valid_targets_mean": 2477.1, "valid_targets_min": 924 }, { "epoch": 4.904, "grad_norm": 0.6736041760480485, "learning_rate": 3.940875974388749e-06, "loss": 0.3732, "loss_nan_ranks": 0, "loss_rank_avg": 0.34606072306632996, "step": 3065, "valid_targets_mean": 2141.9, "valid_targets_min": 442 }, { "epoch": 4.912, "grad_norm": 0.664830451092538, "learning_rate": 3.885568368619013e-06, "loss": 0.3626, "loss_nan_ranks": 0, "loss_rank_avg": 0.3804055452346802, "step": 3070, "valid_targets_mean": 2583.8, "valid_targets_min": 943 }, { "epoch": 4.92, "grad_norm": 0.6231360141274791, "learning_rate": 3.830609827931877e-06, "loss": 0.3684, "loss_nan_ranks": 0, "loss_rank_avg": 0.3745899796485901, "step": 3075, "valid_targets_mean": 2623.6, "valid_targets_min": 1711 }, { "epoch": 4.928, "grad_norm": 0.6575635297911601, "learning_rate": 3.7760015428196694e-06, "loss": 0.3598, "loss_nan_ranks": 0, "loss_rank_avg": 0.3489200174808502, "step": 3080, "valid_targets_mean": 2478.5, "valid_targets_min": 977 }, { "epoch": 4.936, "grad_norm": 0.6571068920250964, "learning_rate": 3.7217446961876413e-06, "loss": 0.3716, "loss_nan_ranks": 0, "loss_rank_avg": 0.36605334281921387, "step": 3085, "valid_targets_mean": 2625.0, "valid_targets_min": 1006 }, { "epoch": 4.944, "grad_norm": 0.6474105338007999, "learning_rate": 3.6678404633282826e-06, "loss": 0.37, "loss_nan_ranks": 0, "loss_rank_avg": 0.37614741921424866, "step": 3090, "valid_targets_mean": 2364.7, "valid_targets_min": 1127 }, { "epoch": 4.952, "grad_norm": 0.6552527098199069, "learning_rate": 3.6142900118959158e-06, "loss": 0.367, "loss_nan_ranks": 0, "loss_rank_avg": 0.37575167417526245, "step": 3095, "valid_targets_mean": 2360.6, "valid_targets_min": 685 }, { "epoch": 4.96, "grad_norm": 0.7523853247297415, "learning_rate": 3.561094501881339e-06, "loss": 0.3693, "loss_nan_ranks": 0, "loss_rank_avg": 0.3789512515068054, "step": 3100, "valid_targets_mean": 1838.3, "valid_targets_min": 578 }, { "epoch": 4.968, "grad_norm": 0.704782833537084, "learning_rate": 3.5082550855867693e-06, "loss": 0.3831, "loss_nan_ranks": 0, "loss_rank_avg": 0.3671078085899353, "step": 3105, "valid_targets_mean": 2336.1, "valid_targets_min": 1135 }, { "epoch": 4.976, "grad_norm": 1.0492112217903906, "learning_rate": 3.455772907600841e-06, "loss": 0.3604, "loss_nan_ranks": 0, "loss_rank_avg": 0.3749602437019348, "step": 3110, "valid_targets_mean": 2047.2, "valid_targets_min": 800 }, { "epoch": 4.984, "grad_norm": 0.7745551022530366, "learning_rate": 3.4036491047738075e-06, "loss": 0.3736, "loss_nan_ranks": 0, "loss_rank_avg": 0.37036004662513733, "step": 3115, "valid_targets_mean": 2268.4, "valid_targets_min": 985 }, { "epoch": 4.992, "grad_norm": 0.6234015250941393, "learning_rate": 3.351884806192933e-06, "loss": 0.3551, "loss_nan_ranks": 0, "loss_rank_avg": 0.35156768560409546, "step": 3120, "valid_targets_mean": 2443.3, "valid_targets_min": 654 }, { "epoch": 5.0, "grad_norm": 0.672594729267636, "learning_rate": 3.3004811331580268e-06, "loss": 0.3609, "loss_nan_ranks": 0, "loss_rank_avg": 0.3615241050720215, "step": 3125, "valid_targets_mean": 2253.5, "valid_targets_min": 645 }, { "epoch": 5.008, "grad_norm": 0.6125356775058436, "learning_rate": 3.249439199157167e-06, "loss": 0.3575, "loss_nan_ranks": 0, "loss_rank_avg": 0.3192368745803833, "step": 3130, "valid_targets_mean": 2929.1, "valid_targets_min": 1322 }, { "epoch": 5.016, "grad_norm": 0.6488989071092053, "learning_rate": 3.198760109842558e-06, "loss": 0.3805, "loss_nan_ranks": 0, "loss_rank_avg": 0.344971626996994, "step": 3135, "valid_targets_mean": 2227.6, "valid_targets_min": 608 }, { "epoch": 5.024, "grad_norm": 0.6156448086036765, "learning_rate": 3.1484449630065894e-06, "loss": 0.3605, "loss_nan_ranks": 0, "loss_rank_avg": 0.3354613482952118, "step": 3140, "valid_targets_mean": 2545.4, "valid_targets_min": 796 }, { "epoch": 5.032, "grad_norm": 0.719895670722136, "learning_rate": 3.0984948485580736e-06, "loss": 0.3618, "loss_nan_ranks": 0, "loss_rank_avg": 0.3566667437553406, "step": 3145, "valid_targets_mean": 2109.3, "valid_targets_min": 763 }, { "epoch": 5.04, "grad_norm": 0.614850938651707, "learning_rate": 3.048910848498605e-06, "loss": 0.3521, "loss_nan_ranks": 0, "loss_rank_avg": 0.3528083264827728, "step": 3150, "valid_targets_mean": 2628.0, "valid_targets_min": 630 }, { "epoch": 5.048, "grad_norm": 0.7790162967880888, "learning_rate": 2.9996940368991477e-06, "loss": 0.3408, "loss_nan_ranks": 0, "loss_rank_avg": 0.3702874779701233, "step": 3155, "valid_targets_mean": 2356.4, "valid_targets_min": 868 }, { "epoch": 5.056, "grad_norm": 0.6660344390557594, "learning_rate": 2.9508454798767516e-06, "loss": 0.3633, "loss_nan_ranks": 0, "loss_rank_avg": 0.35248178243637085, "step": 3160, "valid_targets_mean": 2445.2, "valid_targets_min": 1027 }, { "epoch": 5.064, "grad_norm": 0.638861478860897, "learning_rate": 2.9023662355714766e-06, "loss": 0.3676, "loss_nan_ranks": 0, "loss_rank_avg": 0.3651914894580841, "step": 3165, "valid_targets_mean": 2576.9, "valid_targets_min": 841 }, { "epoch": 5.072, "grad_norm": 0.5988532849444926, "learning_rate": 2.8542573541234707e-06, "loss": 0.3577, "loss_nan_ranks": 0, "loss_rank_avg": 0.3110966682434082, "step": 3170, "valid_targets_mean": 3015.4, "valid_targets_min": 800 }, { "epoch": 5.08, "grad_norm": 0.6054079388440893, "learning_rate": 2.80651987765018e-06, "loss": 0.361, "loss_nan_ranks": 0, "loss_rank_avg": 0.3328086733818054, "step": 3175, "valid_targets_mean": 2782.4, "valid_targets_min": 701 }, { "epoch": 5.088, "grad_norm": 0.7338578008268197, "learning_rate": 2.759154840223843e-06, "loss": 0.3517, "loss_nan_ranks": 0, "loss_rank_avg": 0.36060547828674316, "step": 3180, "valid_targets_mean": 1945.8, "valid_targets_min": 719 }, { "epoch": 5.096, "grad_norm": 0.6685601535146233, "learning_rate": 2.7121632678490327e-06, "loss": 0.3578, "loss_nan_ranks": 0, "loss_rank_avg": 0.3761441111564636, "step": 3185, "valid_targets_mean": 2452.0, "valid_targets_min": 948 }, { "epoch": 5.104, "grad_norm": 0.7688881744820886, "learning_rate": 2.6655461784404768e-06, "loss": 0.3548, "loss_nan_ranks": 0, "loss_rank_avg": 0.37389665842056274, "step": 3190, "valid_targets_mean": 1935.4, "valid_targets_min": 648 }, { "epoch": 5.112, "grad_norm": 0.6905310599554686, "learning_rate": 2.6193045818009654e-06, "loss": 0.3617, "loss_nan_ranks": 0, "loss_rank_avg": 0.36182960867881775, "step": 3195, "valid_targets_mean": 2116.4, "valid_targets_min": 689 }, { "epoch": 5.12, "grad_norm": 0.6320635267289159, "learning_rate": 2.5734394795995066e-06, "loss": 0.3624, "loss_nan_ranks": 0, "loss_rank_avg": 0.36028727889060974, "step": 3200, "valid_targets_mean": 2626.6, "valid_targets_min": 1241 }, { "epoch": 5.128, "grad_norm": 0.6607522571681456, "learning_rate": 2.5279518653496272e-06, "loss": 0.3599, "loss_nan_ranks": 0, "loss_rank_avg": 0.35482197999954224, "step": 3205, "valid_targets_mean": 2485.2, "valid_targets_min": 669 }, { "epoch": 5.136, "grad_norm": 0.6696357677993036, "learning_rate": 2.4828427243878307e-06, "loss": 0.3585, "loss_nan_ranks": 0, "loss_rank_avg": 0.33945372700691223, "step": 3210, "valid_targets_mean": 2405.8, "valid_targets_min": 741 }, { "epoch": 5.144, "grad_norm": 0.7047794366612183, "learning_rate": 2.4381130338522762e-06, "loss": 0.361, "loss_nan_ranks": 0, "loss_rank_avg": 0.37273550033569336, "step": 3215, "valid_targets_mean": 2244.8, "valid_targets_min": 1182 }, { "epoch": 5.152, "grad_norm": 0.6564416259617804, "learning_rate": 2.393763762661596e-06, "loss": 0.3526, "loss_nan_ranks": 0, "loss_rank_avg": 0.3642486035823822, "step": 3220, "valid_targets_mean": 2322.0, "valid_targets_min": 1177 }, { "epoch": 5.16, "grad_norm": 0.6181350427372484, "learning_rate": 2.349795871493925e-06, "loss": 0.3512, "loss_nan_ranks": 0, "loss_rank_avg": 0.33632394671440125, "step": 3225, "valid_targets_mean": 2635.9, "valid_targets_min": 525 }, { "epoch": 5.168, "grad_norm": 0.6406934295103374, "learning_rate": 2.3062103127660686e-06, "loss": 0.366, "loss_nan_ranks": 0, "loss_rank_avg": 0.36425334215164185, "step": 3230, "valid_targets_mean": 2557.0, "valid_targets_min": 1307 }, { "epoch": 5.176, "grad_norm": 0.6165343586266914, "learning_rate": 2.2630080306128833e-06, "loss": 0.3646, "loss_nan_ranks": 0, "loss_rank_avg": 0.32171159982681274, "step": 3235, "valid_targets_mean": 2622.6, "valid_targets_min": 981 }, { "epoch": 5.184, "grad_norm": 0.6892880210277768, "learning_rate": 2.2201899608668365e-06, "loss": 0.3599, "loss_nan_ranks": 0, "loss_rank_avg": 0.3722003400325775, "step": 3240, "valid_targets_mean": 2352.9, "valid_targets_min": 538 }, { "epoch": 5.192, "grad_norm": 0.6791780801679136, "learning_rate": 2.1777570310377084e-06, "loss": 0.3464, "loss_nan_ranks": 0, "loss_rank_avg": 0.3598274290561676, "step": 3245, "valid_targets_mean": 2443.2, "valid_targets_min": 662 }, { "epoch": 5.2, "grad_norm": 0.6806863637030199, "learning_rate": 2.1357101602925323e-06, "loss": 0.3593, "loss_nan_ranks": 0, "loss_rank_avg": 0.35902658104896545, "step": 3250, "valid_targets_mean": 2207.4, "valid_targets_min": 617 }, { "epoch": 5.208, "grad_norm": 0.7316316341641499, "learning_rate": 2.0940502594356427e-06, "loss": 0.3789, "loss_nan_ranks": 0, "loss_rank_avg": 0.37861448526382446, "step": 3255, "valid_targets_mean": 2340.1, "valid_targets_min": 597 }, { "epoch": 5.216, "grad_norm": 0.7882908631165876, "learning_rate": 2.052778230888994e-06, "loss": 0.3552, "loss_nan_ranks": 0, "loss_rank_avg": 0.386216402053833, "step": 3260, "valid_targets_mean": 1980.9, "valid_targets_min": 539 }, { "epoch": 5.224, "grad_norm": 0.6483668844257544, "learning_rate": 2.0118949686725786e-06, "loss": 0.3598, "loss_nan_ranks": 0, "loss_rank_avg": 0.3690330386161804, "step": 3265, "valid_targets_mean": 2679.3, "valid_targets_min": 1039 }, { "epoch": 5.232, "grad_norm": 0.6787408597631184, "learning_rate": 1.971401358385072e-06, "loss": 0.3596, "loss_nan_ranks": 0, "loss_rank_avg": 0.3550987243652344, "step": 3270, "valid_targets_mean": 2274.2, "valid_targets_min": 712 }, { "epoch": 5.24, "grad_norm": 0.7569513313755559, "learning_rate": 1.9312982771846435e-06, "loss": 0.3634, "loss_nan_ranks": 0, "loss_rank_avg": 0.4026515483856201, "step": 3275, "valid_targets_mean": 1947.2, "valid_targets_min": 684 }, { "epoch": 5.248, "grad_norm": 0.7060321478005621, "learning_rate": 1.8915865937699652e-06, "loss": 0.3553, "loss_nan_ranks": 0, "loss_rank_avg": 0.3544864058494568, "step": 3280, "valid_targets_mean": 2002.1, "valid_targets_min": 710 }, { "epoch": 5.256, "grad_norm": 0.5588815510320256, "learning_rate": 1.8522671683613946e-06, "loss": 0.351, "loss_nan_ranks": 0, "loss_rank_avg": 0.3070131540298462, "step": 3285, "valid_targets_mean": 2963.2, "valid_targets_min": 661 }, { "epoch": 5.264, "grad_norm": 0.7256277028506886, "learning_rate": 1.8133408526823283e-06, "loss": 0.3662, "loss_nan_ranks": 0, "loss_rank_avg": 0.38448530435562134, "step": 3290, "valid_targets_mean": 2194.4, "valid_targets_min": 698 }, { "epoch": 5.272, "grad_norm": 0.6579211667621234, "learning_rate": 1.7748084899407558e-06, "loss": 0.356, "loss_nan_ranks": 0, "loss_rank_avg": 0.3459991216659546, "step": 3295, "valid_targets_mean": 2373.1, "valid_targets_min": 787 }, { "epoch": 5.28, "grad_norm": 0.6820255470210079, "learning_rate": 1.7366709148110118e-06, "loss": 0.3605, "loss_nan_ranks": 0, "loss_rank_avg": 0.36668434739112854, "step": 3300, "valid_targets_mean": 2471.6, "valid_targets_min": 1419 }, { "epoch": 5.288, "grad_norm": 0.7082042585335822, "learning_rate": 1.698928953415675e-06, "loss": 0.3618, "loss_nan_ranks": 0, "loss_rank_avg": 0.3762280344963074, "step": 3305, "valid_targets_mean": 2216.0, "valid_targets_min": 774 }, { "epoch": 5.296, "grad_norm": 0.777879323614007, "learning_rate": 1.6615834233076756e-06, "loss": 0.3431, "loss_nan_ranks": 0, "loss_rank_avg": 0.39311477541923523, "step": 3310, "valid_targets_mean": 1915.2, "valid_targets_min": 622 }, { "epoch": 5.304, "grad_norm": 0.667224442472971, "learning_rate": 1.6246351334525944e-06, "loss": 0.3606, "loss_nan_ranks": 0, "loss_rank_avg": 0.35362792015075684, "step": 3315, "valid_targets_mean": 2345.8, "valid_targets_min": 604 }, { "epoch": 5.312, "grad_norm": 0.6727530950210006, "learning_rate": 1.5880848842111362e-06, "loss": 0.3642, "loss_nan_ranks": 0, "loss_rank_avg": 0.3399882912635803, "step": 3320, "valid_targets_mean": 2341.9, "valid_targets_min": 592 }, { "epoch": 5.32, "grad_norm": 0.6695401723963949, "learning_rate": 1.5519334673218023e-06, "loss": 0.3546, "loss_nan_ranks": 0, "loss_rank_avg": 0.35643112659454346, "step": 3325, "valid_targets_mean": 2406.8, "valid_targets_min": 746 }, { "epoch": 5.328, "grad_norm": 0.6513531697775429, "learning_rate": 1.5161816658837002e-06, "loss": 0.3472, "loss_nan_ranks": 0, "loss_rank_avg": 0.3052400052547455, "step": 3330, "valid_targets_mean": 2647.8, "valid_targets_min": 622 }, { "epoch": 5.336, "grad_norm": 0.7766971137424113, "learning_rate": 1.4808302543396423e-06, "loss": 0.3492, "loss_nan_ranks": 0, "loss_rank_avg": 0.37005147337913513, "step": 3335, "valid_targets_mean": 2038.7, "valid_targets_min": 634 }, { "epoch": 5.344, "grad_norm": 0.7026028192917165, "learning_rate": 1.445879998459314e-06, "loss": 0.3479, "loss_nan_ranks": 0, "loss_rank_avg": 0.34184879064559937, "step": 3340, "valid_targets_mean": 2478.4, "valid_targets_min": 590 }, { "epoch": 5.352, "grad_norm": 0.644580097683027, "learning_rate": 1.4113316553227296e-06, "loss": 0.3467, "loss_nan_ranks": 0, "loss_rank_avg": 0.3371330499649048, "step": 3345, "valid_targets_mean": 2657.6, "valid_targets_min": 1401 }, { "epoch": 5.36, "grad_norm": 0.705372710572543, "learning_rate": 1.3771859733037896e-06, "loss": 0.3447, "loss_nan_ranks": 0, "loss_rank_avg": 0.3721950054168701, "step": 3350, "valid_targets_mean": 2235.4, "valid_targets_min": 753 }, { "epoch": 5.368, "grad_norm": 0.7041286066838948, "learning_rate": 1.3434436920541072e-06, "loss": 0.3619, "loss_nan_ranks": 0, "loss_rank_avg": 0.36865848302841187, "step": 3355, "valid_targets_mean": 2148.9, "valid_targets_min": 992 }, { "epoch": 5.376, "grad_norm": 0.6598927398730162, "learning_rate": 1.3101055424869768e-06, "loss": 0.3491, "loss_nan_ranks": 0, "loss_rank_avg": 0.36260682344436646, "step": 3360, "valid_targets_mean": 2419.1, "valid_targets_min": 906 }, { "epoch": 5.384, "grad_norm": 0.7943332000313966, "learning_rate": 1.2771722467615266e-06, "loss": 0.3666, "loss_nan_ranks": 0, "loss_rank_avg": 0.384222149848938, "step": 3365, "valid_targets_mean": 1807.8, "valid_targets_min": 511 }, { "epoch": 5.392, "grad_norm": 0.6283790962992163, "learning_rate": 1.2446445182670818e-06, "loss": 0.3855, "loss_nan_ranks": 0, "loss_rank_avg": 0.3594076633453369, "step": 3370, "valid_targets_mean": 2752.9, "valid_targets_min": 715 }, { "epoch": 5.4, "grad_norm": 0.6308888988033152, "learning_rate": 1.21252306160772e-06, "loss": 0.3579, "loss_nan_ranks": 0, "loss_rank_avg": 0.3309343457221985, "step": 3375, "valid_targets_mean": 2615.4, "valid_targets_min": 1060 }, { "epoch": 5.408, "grad_norm": 0.6212982070573271, "learning_rate": 1.1808085725870088e-06, "loss": 0.357, "loss_nan_ranks": 0, "loss_rank_avg": 0.3432900011539459, "step": 3380, "valid_targets_mean": 2605.8, "valid_targets_min": 1208 }, { "epoch": 5.416, "grad_norm": 0.6325306117951953, "learning_rate": 1.1495017381929197e-06, "loss": 0.3568, "loss_nan_ranks": 0, "loss_rank_avg": 0.34718793630599976, "step": 3385, "valid_targets_mean": 2651.6, "valid_targets_min": 1221 }, { "epoch": 5.424, "grad_norm": 0.6551884609744554, "learning_rate": 1.1186032365829514e-06, "loss": 0.3524, "loss_nan_ranks": 0, "loss_rank_avg": 0.3209971487522125, "step": 3390, "valid_targets_mean": 2383.6, "valid_targets_min": 830 }, { "epoch": 5.432, "grad_norm": 0.6861395147132316, "learning_rate": 1.088113737069456e-06, "loss": 0.3616, "loss_nan_ranks": 0, "loss_rank_avg": 0.3504214286804199, "step": 3395, "valid_targets_mean": 2286.2, "valid_targets_min": 627 }, { "epoch": 5.44, "grad_norm": 0.7142301912993754, "learning_rate": 1.0580339001051153e-06, "loss": 0.3658, "loss_nan_ranks": 0, "loss_rank_avg": 0.4048931300640106, "step": 3400, "valid_targets_mean": 2164.6, "valid_targets_min": 1069 }, { "epoch": 5.448, "grad_norm": 0.6852209256299653, "learning_rate": 1.0283643772686535e-06, "loss": 0.3476, "loss_nan_ranks": 0, "loss_rank_avg": 0.3515816926956177, "step": 3405, "valid_targets_mean": 2186.8, "valid_targets_min": 535 }, { "epoch": 5.456, "grad_norm": 0.7376221866961876, "learning_rate": 9.991058112507113e-07, "loss": 0.3505, "loss_nan_ranks": 0, "loss_rank_avg": 0.40202945470809937, "step": 3410, "valid_targets_mean": 2289.1, "valid_targets_min": 779 }, { "epoch": 5.464, "grad_norm": 0.6144769599130704, "learning_rate": 9.702588358399345e-07, "loss": 0.3583, "loss_nan_ranks": 0, "loss_rank_avg": 0.3659561276435852, "step": 3415, "valid_targets_mean": 2851.7, "valid_targets_min": 847 }, { "epoch": 5.4719999999999995, "grad_norm": 0.6817618706864722, "learning_rate": 9.418240759092434e-07, "loss": 0.3637, "loss_nan_ranks": 0, "loss_rank_avg": 0.3654489517211914, "step": 3420, "valid_targets_mean": 2237.9, "valid_targets_min": 652 }, { "epoch": 5.48, "grad_norm": 0.6663119774773926, "learning_rate": 9.138021474022763e-07, "loss": 0.3598, "loss_nan_ranks": 0, "loss_rank_avg": 0.37460604310035706, "step": 3425, "valid_targets_mean": 2327.5, "valid_targets_min": 620 }, { "epoch": 5.4879999999999995, "grad_norm": 0.6716553399135632, "learning_rate": 8.861936573200825e-07, "loss": 0.363, "loss_nan_ranks": 0, "loss_rank_avg": 0.3525908589363098, "step": 3430, "valid_targets_mean": 2574.8, "valid_targets_min": 1182 }, { "epoch": 5.496, "grad_norm": 0.6735504443339394, "learning_rate": 8.58999203707942e-07, "loss": 0.3678, "loss_nan_ranks": 0, "loss_rank_avg": 0.38089731335639954, "step": 3435, "valid_targets_mean": 2493.9, "valid_targets_min": 883 }, { "epoch": 5.504, "grad_norm": 0.663972336719367, "learning_rate": 8.322193756424401e-07, "loss": 0.3465, "loss_nan_ranks": 0, "loss_rank_avg": 0.34215810894966125, "step": 3440, "valid_targets_mean": 2357.2, "valid_targets_min": 812 }, { "epoch": 5.5120000000000005, "grad_norm": 0.6192143246660865, "learning_rate": 8.058547532186667e-07, "loss": 0.3542, "loss_nan_ranks": 0, "loss_rank_avg": 0.3521275520324707, "step": 3445, "valid_targets_mean": 2753.1, "valid_targets_min": 1714 }, { "epoch": 5.52, "grad_norm": 0.6953540634211431, "learning_rate": 7.799059075376991e-07, "loss": 0.3623, "loss_nan_ranks": 0, "loss_rank_avg": 0.3627205193042755, "step": 3450, "valid_targets_mean": 2349.4, "valid_targets_min": 544 }, { "epoch": 5.5280000000000005, "grad_norm": 0.6802926606310232, "learning_rate": 7.54373400694195e-07, "loss": 0.3546, "loss_nan_ranks": 0, "loss_rank_avg": 0.3609620928764343, "step": 3455, "valid_targets_mean": 2452.8, "valid_targets_min": 782 }, { "epoch": 5.536, "grad_norm": 0.6634915525384147, "learning_rate": 7.292577857642302e-07, "loss": 0.357, "loss_nan_ranks": 0, "loss_rank_avg": 0.3651014566421509, "step": 3460, "valid_targets_mean": 2629.6, "valid_targets_min": 761 }, { "epoch": 5.5440000000000005, "grad_norm": 0.63417551257205, "learning_rate": 7.045596067933158e-07, "loss": 0.3587, "loss_nan_ranks": 0, "loss_rank_avg": 0.32456403970718384, "step": 3465, "valid_targets_mean": 2452.6, "valid_targets_min": 915 }, { "epoch": 5.552, "grad_norm": 0.5803948039648736, "learning_rate": 6.80279398784609e-07, "loss": 0.3381, "loss_nan_ranks": 0, "loss_rank_avg": 0.31592857837677, "step": 3470, "valid_targets_mean": 2696.3, "valid_targets_min": 1633 }, { "epoch": 5.5600000000000005, "grad_norm": 0.6857358254181065, "learning_rate": 6.56417687687343e-07, "loss": 0.3387, "loss_nan_ranks": 0, "loss_rank_avg": 0.34818774461746216, "step": 3475, "valid_targets_mean": 2154.4, "valid_targets_min": 548 }, { "epoch": 5.568, "grad_norm": 0.6581872551853069, "learning_rate": 6.329749903854066e-07, "loss": 0.3421, "loss_nan_ranks": 0, "loss_rank_avg": 0.3282102942466736, "step": 3480, "valid_targets_mean": 2525.1, "valid_targets_min": 747 }, { "epoch": 5.576, "grad_norm": 0.7037981685088222, "learning_rate": 6.099518146861628e-07, "loss": 0.3565, "loss_nan_ranks": 0, "loss_rank_avg": 0.34629759192466736, "step": 3485, "valid_targets_mean": 2143.9, "valid_targets_min": 948 }, { "epoch": 5.584, "grad_norm": 0.6977519908519918, "learning_rate": 5.873486593094546e-07, "loss": 0.3591, "loss_nan_ranks": 0, "loss_rank_avg": 0.3663643002510071, "step": 3490, "valid_targets_mean": 2090.2, "valid_targets_min": 631 }, { "epoch": 5.592, "grad_norm": 0.612283944786415, "learning_rate": 5.651660138767834e-07, "loss": 0.3576, "loss_nan_ranks": 0, "loss_rank_avg": 0.3275211453437805, "step": 3495, "valid_targets_mean": 2624.4, "valid_targets_min": 698 }, { "epoch": 5.6, "grad_norm": 0.751829222915938, "learning_rate": 5.434043589007232e-07, "loss": 0.3413, "loss_nan_ranks": 0, "loss_rank_avg": 0.3469979763031006, "step": 3500, "valid_targets_mean": 2564.5, "valid_targets_min": 924 }, { "epoch": 5.608, "grad_norm": 0.7027860247672669, "learning_rate": 5.220641657744963e-07, "loss": 0.3619, "loss_nan_ranks": 0, "loss_rank_avg": 0.36408111453056335, "step": 3505, "valid_targets_mean": 2274.8, "valid_targets_min": 618 }, { "epoch": 5.616, "grad_norm": 0.6902304457311933, "learning_rate": 5.0114589676177e-07, "loss": 0.3622, "loss_nan_ranks": 0, "loss_rank_avg": 0.36030685901641846, "step": 3510, "valid_targets_mean": 2209.7, "valid_targets_min": 948 }, { "epoch": 5.624, "grad_norm": 0.6614672483667304, "learning_rate": 4.806500049866492e-07, "loss": 0.3462, "loss_nan_ranks": 0, "loss_rank_avg": 0.3510351777076721, "step": 3515, "valid_targets_mean": 2432.2, "valid_targets_min": 842 }, { "epoch": 5.632, "grad_norm": 0.6800892158996101, "learning_rate": 4.6057693442383756e-07, "loss": 0.3569, "loss_nan_ranks": 0, "loss_rank_avg": 0.3634949028491974, "step": 3520, "valid_targets_mean": 2441.5, "valid_targets_min": 720 }, { "epoch": 5.64, "grad_norm": 0.6769618683554501, "learning_rate": 4.409271198890519e-07, "loss": 0.3484, "loss_nan_ranks": 0, "loss_rank_avg": 0.37198689579963684, "step": 3525, "valid_targets_mean": 2369.9, "valid_targets_min": 1527 }, { "epoch": 5.648, "grad_norm": 0.666362195969312, "learning_rate": 4.217009870295763e-07, "loss": 0.3497, "loss_nan_ranks": 0, "loss_rank_avg": 0.3538084626197815, "step": 3530, "valid_targets_mean": 2409.4, "valid_targets_min": 1270 }, { "epoch": 5.656, "grad_norm": 0.6739195979135522, "learning_rate": 4.028989523150628e-07, "loss": 0.3516, "loss_nan_ranks": 0, "loss_rank_avg": 0.3414755165576935, "step": 3535, "valid_targets_mean": 2567.1, "valid_targets_min": 691 }, { "epoch": 5.664, "grad_norm": 0.7059314292395321, "learning_rate": 3.8452142302849216e-07, "loss": 0.3524, "loss_nan_ranks": 0, "loss_rank_avg": 0.3656729459762573, "step": 3540, "valid_targets_mean": 2145.8, "valid_targets_min": 699 }, { "epoch": 5.672, "grad_norm": 0.7327175619232137, "learning_rate": 3.665687972573606e-07, "loss": 0.3554, "loss_nan_ranks": 0, "loss_rank_avg": 0.33814215660095215, "step": 3545, "valid_targets_mean": 1918.9, "valid_targets_min": 706 }, { "epoch": 5.68, "grad_norm": 0.6698574435915802, "learning_rate": 3.4904146388506475e-07, "loss": 0.3644, "loss_nan_ranks": 0, "loss_rank_avg": 0.371431827545166, "step": 3550, "valid_targets_mean": 2439.1, "valid_targets_min": 1297 }, { "epoch": 5.688, "grad_norm": 0.6426157118596137, "learning_rate": 3.319398025824572e-07, "loss": 0.3686, "loss_nan_ranks": 0, "loss_rank_avg": 0.36349034309387207, "step": 3555, "valid_targets_mean": 2658.6, "valid_targets_min": 833 }, { "epoch": 5.696, "grad_norm": 0.7173898048768296, "learning_rate": 3.152641837996373e-07, "loss": 0.3569, "loss_nan_ranks": 0, "loss_rank_avg": 0.3507586121559143, "step": 3560, "valid_targets_mean": 2054.9, "valid_targets_min": 876 }, { "epoch": 5.704, "grad_norm": 0.6703837137982286, "learning_rate": 2.990149687579247e-07, "loss": 0.3426, "loss_nan_ranks": 0, "loss_rank_avg": 0.3759458661079407, "step": 3565, "valid_targets_mean": 2434.6, "valid_targets_min": 1637 }, { "epoch": 5.712, "grad_norm": 0.6625498990103753, "learning_rate": 2.8319250944203625e-07, "loss": 0.3527, "loss_nan_ranks": 0, "loss_rank_avg": 0.31836962699890137, "step": 3570, "valid_targets_mean": 2958.1, "valid_targets_min": 1197 }, { "epoch": 5.72, "grad_norm": 0.655529717895589, "learning_rate": 2.677971485924502e-07, "loss": 0.36, "loss_nan_ranks": 0, "loss_rank_avg": 0.3503941297531128, "step": 3575, "valid_targets_mean": 2468.9, "valid_targets_min": 645 }, { "epoch": 5.728, "grad_norm": 0.6056215587391824, "learning_rate": 2.52829219697992e-07, "loss": 0.3554, "loss_nan_ranks": 0, "loss_rank_avg": 0.34623590111732483, "step": 3580, "valid_targets_mean": 2907.1, "valid_targets_min": 2005 }, { "epoch": 5.736, "grad_norm": 0.7004852586309464, "learning_rate": 2.3828904698861565e-07, "loss": 0.3497, "loss_nan_ranks": 0, "loss_rank_avg": 0.36775827407836914, "step": 3585, "valid_targets_mean": 2347.5, "valid_targets_min": 1409 }, { "epoch": 5.744, "grad_norm": 0.660691313353332, "learning_rate": 2.2417694542836489e-07, "loss": 0.3454, "loss_nan_ranks": 0, "loss_rank_avg": 0.3477632403373718, "step": 3590, "valid_targets_mean": 2542.6, "valid_targets_min": 762 }, { "epoch": 5.752, "grad_norm": 0.6722150282665975, "learning_rate": 2.104932207085586e-07, "loss": 0.3588, "loss_nan_ranks": 0, "loss_rank_avg": 0.3427378535270691, "step": 3595, "valid_targets_mean": 2306.7, "valid_targets_min": 591 }, { "epoch": 5.76, "grad_norm": 0.6774361071713108, "learning_rate": 1.97238169241174e-07, "loss": 0.3484, "loss_nan_ranks": 0, "loss_rank_avg": 0.3895723223686218, "step": 3600, "valid_targets_mean": 2637.4, "valid_targets_min": 1795 }, { "epoch": 5.768, "grad_norm": 0.6995990010581683, "learning_rate": 1.8441207815241613e-07, "loss": 0.3611, "loss_nan_ranks": 0, "loss_rank_avg": 0.36859995126724243, "step": 3605, "valid_targets_mean": 2243.5, "valid_targets_min": 683 }, { "epoch": 5.776, "grad_norm": 0.7097143380434422, "learning_rate": 1.720152252765095e-07, "loss": 0.3538, "loss_nan_ranks": 0, "loss_rank_avg": 0.3496978282928467, "step": 3610, "valid_targets_mean": 2155.9, "valid_targets_min": 651 }, { "epoch": 5.784, "grad_norm": 0.5728311009707366, "learning_rate": 1.600478791496629e-07, "loss": 0.3453, "loss_nan_ranks": 0, "loss_rank_avg": 0.3100730776786804, "step": 3615, "valid_targets_mean": 2915.6, "valid_targets_min": 583 }, { "epoch": 5.792, "grad_norm": 0.6428640732598566, "learning_rate": 1.4851029900427415e-07, "loss": 0.3608, "loss_nan_ranks": 0, "loss_rank_avg": 0.3315027356147766, "step": 3620, "valid_targets_mean": 2660.2, "valid_targets_min": 814 }, { "epoch": 5.8, "grad_norm": 0.687739684791235, "learning_rate": 1.3740273476329224e-07, "loss": 0.3461, "loss_nan_ranks": 0, "loss_rank_avg": 0.3281402289867401, "step": 3625, "valid_targets_mean": 2156.2, "valid_targets_min": 926 }, { "epoch": 5.808, "grad_norm": 0.7237593508562562, "learning_rate": 1.2672542703482616e-07, "loss": 0.3524, "loss_nan_ranks": 0, "loss_rank_avg": 0.3774512708187103, "step": 3630, "valid_targets_mean": 2193.0, "valid_targets_min": 1095 }, { "epoch": 5.816, "grad_norm": 0.7248363306939806, "learning_rate": 1.164786071069135e-07, "loss": 0.3639, "loss_nan_ranks": 0, "loss_rank_avg": 0.3838192820549011, "step": 3635, "valid_targets_mean": 2081.1, "valid_targets_min": 370 }, { "epoch": 5.824, "grad_norm": 0.8579885115423562, "learning_rate": 1.0666249694251785e-07, "loss": 0.3602, "loss_nan_ranks": 0, "loss_rank_avg": 0.3642842769622803, "step": 3640, "valid_targets_mean": 2525.1, "valid_targets_min": 1365 }, { "epoch": 5.832, "grad_norm": 0.6333435755510938, "learning_rate": 9.72773091747281e-08, "loss": 0.3441, "loss_nan_ranks": 0, "loss_rank_avg": 0.3298102021217346, "step": 3645, "valid_targets_mean": 2519.0, "valid_targets_min": 1529 }, { "epoch": 5.84, "grad_norm": 0.6865034564959145, "learning_rate": 8.832324710214002e-08, "loss": 0.3694, "loss_nan_ranks": 0, "loss_rank_avg": 0.36878472566604614, "step": 3650, "valid_targets_mean": 2381.9, "valid_targets_min": 818 }, { "epoch": 5.848, "grad_norm": 0.7161601334807214, "learning_rate": 7.980050468445744e-08, "loss": 0.3464, "loss_nan_ranks": 0, "loss_rank_avg": 0.3624423146247864, "step": 3655, "valid_targets_mean": 2251.8, "valid_targets_min": 741 }, { "epoch": 5.856, "grad_norm": 0.7718758347904282, "learning_rate": 7.170926653829347e-08, "loss": 0.3615, "loss_nan_ranks": 0, "loss_rank_avg": 0.39595723152160645, "step": 3660, "valid_targets_mean": 1961.1, "valid_targets_min": 751 }, { "epoch": 5.864, "grad_norm": 0.7028502040729387, "learning_rate": 6.404970793317145e-08, "loss": 0.3606, "loss_nan_ranks": 0, "loss_rank_avg": 0.3521386384963989, "step": 3665, "valid_targets_mean": 2169.5, "valid_targets_min": 813 }, { "epoch": 5.872, "grad_norm": 0.6710893596193402, "learning_rate": 5.682199478772133e-08, "loss": 0.3531, "loss_nan_ranks": 0, "loss_rank_avg": 0.3567180335521698, "step": 3670, "valid_targets_mean": 2343.6, "valid_targets_min": 982 }, { "epoch": 5.88, "grad_norm": 0.6064785483244445, "learning_rate": 5.0026283666093635e-08, "loss": 0.3714, "loss_nan_ranks": 0, "loss_rank_avg": 0.3486100435256958, "step": 3675, "valid_targets_mean": 2649.5, "valid_targets_min": 1453 }, { "epoch": 5.888, "grad_norm": 0.6260330281616615, "learning_rate": 4.366272177456665e-08, "loss": 0.3654, "loss_nan_ranks": 0, "loss_rank_avg": 0.3249626159667969, "step": 3680, "valid_targets_mean": 2491.5, "valid_targets_min": 900 }, { "epoch": 5.896, "grad_norm": 0.5699534851889925, "learning_rate": 3.773144695834674e-08, "loss": 0.3353, "loss_nan_ranks": 0, "loss_rank_avg": 0.3237248361110687, "step": 3685, "valid_targets_mean": 3058.4, "valid_targets_min": 592 }, { "epoch": 5.904, "grad_norm": 0.6438269114276662, "learning_rate": 3.223258769860405e-08, "loss": 0.3618, "loss_nan_ranks": 0, "loss_rank_avg": 0.35090720653533936, "step": 3690, "valid_targets_mean": 2574.9, "valid_targets_min": 1240 }, { "epoch": 5.912, "grad_norm": 0.6683366651264305, "learning_rate": 2.716626310966808e-08, "loss": 0.3451, "loss_nan_ranks": 0, "loss_rank_avg": 0.34934568405151367, "step": 3695, "valid_targets_mean": 2282.0, "valid_targets_min": 644 }, { "epoch": 5.92, "grad_norm": 0.7329792310074869, "learning_rate": 2.253258293645866e-08, "loss": 0.3572, "loss_nan_ranks": 0, "loss_rank_avg": 0.3719084560871124, "step": 3700, "valid_targets_mean": 2026.5, "valid_targets_min": 697 }, { "epoch": 5.928, "grad_norm": 0.6920279565678626, "learning_rate": 1.8331647552110033e-08, "loss": 0.3535, "loss_nan_ranks": 0, "loss_rank_avg": 0.3860562741756439, "step": 3705, "valid_targets_mean": 2250.8, "valid_targets_min": 544 }, { "epoch": 5.936, "grad_norm": 0.6355753971291512, "learning_rate": 1.456354795578374e-08, "loss": 0.3528, "loss_nan_ranks": 0, "loss_rank_avg": 0.37555772066116333, "step": 3710, "valid_targets_mean": 2537.4, "valid_targets_min": 1178 }, { "epoch": 5.944, "grad_norm": 0.7374044889351038, "learning_rate": 1.1228365770714622e-08, "loss": 0.3565, "loss_nan_ranks": 0, "loss_rank_avg": 0.37717506289482117, "step": 3715, "valid_targets_mean": 2046.4, "valid_targets_min": 886 }, { "epoch": 5.952, "grad_norm": 0.6939052200451631, "learning_rate": 8.326173242432233e-09, "loss": 0.3657, "loss_nan_ranks": 0, "loss_rank_avg": 0.3981803059577942, "step": 3720, "valid_targets_mean": 2279.1, "valid_targets_min": 1434 }, { "epoch": 5.96, "grad_norm": 0.6916821499159527, "learning_rate": 5.857033237199883e-09, "loss": 0.3626, "loss_nan_ranks": 0, "loss_rank_avg": 0.36604171991348267, "step": 3725, "valid_targets_mean": 2196.9, "valid_targets_min": 567 }, { "epoch": 5.968, "grad_norm": 0.816549828786453, "learning_rate": 3.820999240644608e-09, "loss": 0.364, "loss_nan_ranks": 0, "loss_rank_avg": 0.3439550995826721, "step": 3730, "valid_targets_mean": 2641.3, "valid_targets_min": 677 }, { "epoch": 5.976, "grad_norm": 0.6998780250505308, "learning_rate": 2.2181153566158687e-09, "loss": 0.3538, "loss_nan_ranks": 0, "loss_rank_avg": 0.34991949796676636, "step": 3735, "valid_targets_mean": 2220.7, "valid_targets_min": 638 }, { "epoch": 5.984, "grad_norm": 0.6161285767403547, "learning_rate": 1.0484163062107755e-09, "loss": 0.3444, "loss_nan_ranks": 0, "loss_rank_avg": 0.32655662298202515, "step": 3740, "valid_targets_mean": 2840.2, "valid_targets_min": 745 }, { "epoch": 5.992, "grad_norm": 0.7847617955919047, "learning_rate": 3.11927427034675e-10, "loss": 0.3543, "loss_nan_ranks": 0, "loss_rank_avg": 0.4086189270019531, "step": 3745, "valid_targets_mean": 1846.4, "valid_targets_min": 674 }, { "epoch": 6.0, "grad_norm": 0.7040092695127196, "learning_rate": 8.664672648261985e-12, "loss": 0.3618, "loss_nan_ranks": 0, "loss_rank_avg": 0.36595475673675537, "step": 3750, "valid_targets_mean": 2265.2, "valid_targets_min": 678 }, { "epoch": 6.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.36595475673675537, "step": 3750, "total_flos": 645391703212032.0, "train_loss": 0.40567625595728557, "train_runtime": 15381.7515, "train_samples_per_second": 3.9, "train_steps_per_second": 0.244, "valid_targets_mean": 2265.2, "valid_targets_min": 678 } ], "logging_steps": 5, "max_steps": 3750, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 645391703212032.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }