| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 3750, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008, | |
| "grad_norm": 1.1123269235502846, | |
| "learning_rate": 4.266666666666667e-07, | |
| "loss": 0.5305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5401487350463867, | |
| "step": 5, | |
| "valid_targets_mean": 2568.2, | |
| "valid_targets_min": 1832 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 1.0307736286257203, | |
| "learning_rate": 9.600000000000001e-07, | |
| "loss": 0.5413, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.518206000328064, | |
| "step": 10, | |
| "valid_targets_mean": 2551.6, | |
| "valid_targets_min": 594 | |
| }, | |
| { | |
| "epoch": 0.024, | |
| "grad_norm": 1.1513067813597742, | |
| "learning_rate": 1.4933333333333336e-06, | |
| "loss": 0.5505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5674519538879395, | |
| "step": 15, | |
| "valid_targets_mean": 2249.8, | |
| "valid_targets_min": 1202 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 1.0885622535090271, | |
| "learning_rate": 2.0266666666666666e-06, | |
| "loss": 0.5291, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.540584921836853, | |
| "step": 20, | |
| "valid_targets_mean": 2476.6, | |
| "valid_targets_min": 1301 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.9309407859608781, | |
| "learning_rate": 2.56e-06, | |
| "loss": 0.5469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5405070185661316, | |
| "step": 25, | |
| "valid_targets_mean": 2325.6, | |
| "valid_targets_min": 560 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 0.8653574810736101, | |
| "learning_rate": 3.093333333333334e-06, | |
| "loss": 0.5578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5472347140312195, | |
| "step": 30, | |
| "valid_targets_mean": 2225.7, | |
| "valid_targets_min": 689 | |
| }, | |
| { | |
| "epoch": 0.056, | |
| "grad_norm": 0.7895777368811453, | |
| "learning_rate": 3.6266666666666674e-06, | |
| "loss": 0.5594, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5670331716537476, | |
| "step": 35, | |
| "valid_targets_mean": 2279.0, | |
| "valid_targets_min": 643 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 0.7760634586844122, | |
| "learning_rate": 4.16e-06, | |
| "loss": 0.526, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5177326202392578, | |
| "step": 40, | |
| "valid_targets_mean": 2332.7, | |
| "valid_targets_min": 1072 | |
| }, | |
| { | |
| "epoch": 0.072, | |
| "grad_norm": 0.6939896610202841, | |
| "learning_rate": 4.693333333333334e-06, | |
| "loss": 0.5419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.522503137588501, | |
| "step": 45, | |
| "valid_targets_mean": 2649.1, | |
| "valid_targets_min": 1594 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.6701763260463834, | |
| "learning_rate": 5.226666666666667e-06, | |
| "loss": 0.51, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4922634959220886, | |
| "step": 50, | |
| "valid_targets_mean": 2683.0, | |
| "valid_targets_min": 1193 | |
| }, | |
| { | |
| "epoch": 0.088, | |
| "grad_norm": 0.8048346477254813, | |
| "learning_rate": 5.76e-06, | |
| "loss": 0.5208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5240609049797058, | |
| "step": 55, | |
| "valid_targets_mean": 2272.8, | |
| "valid_targets_min": 786 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 0.65782981166933, | |
| "learning_rate": 6.293333333333334e-06, | |
| "loss": 0.5392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5241539478302002, | |
| "step": 60, | |
| "valid_targets_mean": 2635.2, | |
| "valid_targets_min": 2017 | |
| }, | |
| { | |
| "epoch": 0.104, | |
| "grad_norm": 0.8692611396107511, | |
| "learning_rate": 6.826666666666667e-06, | |
| "loss": 0.5202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5587050914764404, | |
| "step": 65, | |
| "valid_targets_mean": 2095.1, | |
| "valid_targets_min": 642 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 0.7327308095270739, | |
| "learning_rate": 7.360000000000001e-06, | |
| "loss": 0.5126, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5429940819740295, | |
| "step": 70, | |
| "valid_targets_mean": 2413.7, | |
| "valid_targets_min": 809 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.7114841665616957, | |
| "learning_rate": 7.893333333333335e-06, | |
| "loss": 0.5388, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5138970017433167, | |
| "step": 75, | |
| "valid_targets_mean": 2285.8, | |
| "valid_targets_min": 654 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.7450057042477, | |
| "learning_rate": 8.426666666666667e-06, | |
| "loss": 0.5241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.522254228591919, | |
| "step": 80, | |
| "valid_targets_mean": 2392.8, | |
| "valid_targets_min": 834 | |
| }, | |
| { | |
| "epoch": 0.136, | |
| "grad_norm": 0.6646870780143527, | |
| "learning_rate": 8.96e-06, | |
| "loss": 0.5321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4835280776023865, | |
| "step": 85, | |
| "valid_targets_mean": 2408.1, | |
| "valid_targets_min": 863 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.7685295225223336, | |
| "learning_rate": 9.493333333333334e-06, | |
| "loss": 0.4985, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5568891763687134, | |
| "step": 90, | |
| "valid_targets_mean": 2293.4, | |
| "valid_targets_min": 863 | |
| }, | |
| { | |
| "epoch": 0.152, | |
| "grad_norm": 0.757668913170068, | |
| "learning_rate": 1.0026666666666667e-05, | |
| "loss": 0.489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49190622568130493, | |
| "step": 95, | |
| "valid_targets_mean": 1997.3, | |
| "valid_targets_min": 598 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.8812780405653656, | |
| "learning_rate": 1.056e-05, | |
| "loss": 0.5115, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5440094470977783, | |
| "step": 100, | |
| "valid_targets_mean": 2036.3, | |
| "valid_targets_min": 627 | |
| }, | |
| { | |
| "epoch": 0.168, | |
| "grad_norm": 0.7100510683842957, | |
| "learning_rate": 1.1093333333333334e-05, | |
| "loss": 0.5021, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4753778576850891, | |
| "step": 105, | |
| "valid_targets_mean": 2323.8, | |
| "valid_targets_min": 591 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 0.6512988020949609, | |
| "learning_rate": 1.1626666666666668e-05, | |
| "loss": 0.5164, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49330976605415344, | |
| "step": 110, | |
| "valid_targets_mean": 2537.6, | |
| "valid_targets_min": 581 | |
| }, | |
| { | |
| "epoch": 0.184, | |
| "grad_norm": 0.7639238062022932, | |
| "learning_rate": 1.216e-05, | |
| "loss": 0.5252, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5586614608764648, | |
| "step": 115, | |
| "valid_targets_mean": 2337.1, | |
| "valid_targets_min": 882 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.7575138893965674, | |
| "learning_rate": 1.2693333333333336e-05, | |
| "loss": 0.5178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5260697603225708, | |
| "step": 120, | |
| "valid_targets_mean": 2170.1, | |
| "valid_targets_min": 695 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.702449713567109, | |
| "learning_rate": 1.3226666666666668e-05, | |
| "loss": 0.5188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5398459434509277, | |
| "step": 125, | |
| "valid_targets_mean": 2577.2, | |
| "valid_targets_min": 629 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 0.7808788433928092, | |
| "learning_rate": 1.376e-05, | |
| "loss": 0.5113, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5482888221740723, | |
| "step": 130, | |
| "valid_targets_mean": 1933.8, | |
| "valid_targets_min": 852 | |
| }, | |
| { | |
| "epoch": 0.216, | |
| "grad_norm": 0.7056899770108914, | |
| "learning_rate": 1.4293333333333334e-05, | |
| "loss": 0.4977, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4935382008552551, | |
| "step": 135, | |
| "valid_targets_mean": 2204.5, | |
| "valid_targets_min": 584 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 0.7966879290545987, | |
| "learning_rate": 1.4826666666666666e-05, | |
| "loss": 0.4848, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5101238489151001, | |
| "step": 140, | |
| "valid_targets_mean": 2561.9, | |
| "valid_targets_min": 1349 | |
| }, | |
| { | |
| "epoch": 0.232, | |
| "grad_norm": 0.7943913824299708, | |
| "learning_rate": 1.5360000000000002e-05, | |
| "loss": 0.5185, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5645924806594849, | |
| "step": 145, | |
| "valid_targets_mean": 1990.6, | |
| "valid_targets_min": 578 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 1.0993272339263302, | |
| "learning_rate": 1.5893333333333333e-05, | |
| "loss": 0.5205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5444607734680176, | |
| "step": 150, | |
| "valid_targets_mean": 2103.9, | |
| "valid_targets_min": 522 | |
| }, | |
| { | |
| "epoch": 0.248, | |
| "grad_norm": 0.7214471657235726, | |
| "learning_rate": 1.642666666666667e-05, | |
| "loss": 0.4914, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48658621311187744, | |
| "step": 155, | |
| "valid_targets_mean": 2252.8, | |
| "valid_targets_min": 506 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.647860810097823, | |
| "learning_rate": 1.696e-05, | |
| "loss": 0.4897, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4415959417819977, | |
| "step": 160, | |
| "valid_targets_mean": 2587.4, | |
| "valid_targets_min": 1156 | |
| }, | |
| { | |
| "epoch": 0.264, | |
| "grad_norm": 0.7996121351509609, | |
| "learning_rate": 1.7493333333333334e-05, | |
| "loss": 0.4974, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5080252885818481, | |
| "step": 165, | |
| "valid_targets_mean": 2078.8, | |
| "valid_targets_min": 894 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 0.7839710134675049, | |
| "learning_rate": 1.8026666666666668e-05, | |
| "loss": 0.5054, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5535873174667358, | |
| "step": 170, | |
| "valid_targets_mean": 2108.3, | |
| "valid_targets_min": 956 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 0.7271134989631454, | |
| "learning_rate": 1.8560000000000002e-05, | |
| "loss": 0.4994, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5392011404037476, | |
| "step": 175, | |
| "valid_targets_mean": 2380.4, | |
| "valid_targets_min": 523 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.7438261523269221, | |
| "learning_rate": 1.9093333333333336e-05, | |
| "loss": 0.4563, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4989452362060547, | |
| "step": 180, | |
| "valid_targets_mean": 2122.2, | |
| "valid_targets_min": 505 | |
| }, | |
| { | |
| "epoch": 0.296, | |
| "grad_norm": 0.7087097494439891, | |
| "learning_rate": 1.9626666666666666e-05, | |
| "loss": 0.4907, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.440179705619812, | |
| "step": 185, | |
| "valid_targets_mean": 2344.2, | |
| "valid_targets_min": 677 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 0.7922339174945309, | |
| "learning_rate": 2.016e-05, | |
| "loss": 0.5109, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5441197752952576, | |
| "step": 190, | |
| "valid_targets_mean": 2192.1, | |
| "valid_targets_min": 674 | |
| }, | |
| { | |
| "epoch": 0.312, | |
| "grad_norm": 0.8719375736319943, | |
| "learning_rate": 2.0693333333333334e-05, | |
| "loss": 0.4719, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4866606593132019, | |
| "step": 195, | |
| "valid_targets_mean": 2093.7, | |
| "valid_targets_min": 720 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.7147407436139384, | |
| "learning_rate": 2.1226666666666668e-05, | |
| "loss": 0.5033, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48567086458206177, | |
| "step": 200, | |
| "valid_targets_mean": 2235.4, | |
| "valid_targets_min": 891 | |
| }, | |
| { | |
| "epoch": 0.328, | |
| "grad_norm": 0.6645224606175668, | |
| "learning_rate": 2.1760000000000002e-05, | |
| "loss": 0.4713, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45803600549697876, | |
| "step": 205, | |
| "valid_targets_mean": 2461.2, | |
| "valid_targets_min": 767 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 0.6835201565681405, | |
| "learning_rate": 2.2293333333333336e-05, | |
| "loss": 0.4826, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49388882517814636, | |
| "step": 210, | |
| "valid_targets_mean": 2384.2, | |
| "valid_targets_min": 741 | |
| }, | |
| { | |
| "epoch": 0.344, | |
| "grad_norm": 0.7571298087512041, | |
| "learning_rate": 2.282666666666667e-05, | |
| "loss": 0.5161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5247617959976196, | |
| "step": 215, | |
| "valid_targets_mean": 2137.7, | |
| "valid_targets_min": 803 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 0.7493305637197739, | |
| "learning_rate": 2.336e-05, | |
| "loss": 0.4818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4521583914756775, | |
| "step": 220, | |
| "valid_targets_mean": 2493.7, | |
| "valid_targets_min": 1072 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.6302236122957066, | |
| "learning_rate": 2.3893333333333337e-05, | |
| "loss": 0.4597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4275933504104614, | |
| "step": 225, | |
| "valid_targets_mean": 2858.3, | |
| "valid_targets_min": 1495 | |
| }, | |
| { | |
| "epoch": 0.368, | |
| "grad_norm": 0.7547426741735809, | |
| "learning_rate": 2.442666666666667e-05, | |
| "loss": 0.5046, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49721017479896545, | |
| "step": 230, | |
| "valid_targets_mean": 2059.5, | |
| "valid_targets_min": 804 | |
| }, | |
| { | |
| "epoch": 0.376, | |
| "grad_norm": 0.8593657667258695, | |
| "learning_rate": 2.496e-05, | |
| "loss": 0.4785, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5257455110549927, | |
| "step": 235, | |
| "valid_targets_mean": 2286.9, | |
| "valid_targets_min": 1010 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.6840458587232336, | |
| "learning_rate": 2.5493333333333335e-05, | |
| "loss": 0.478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47672101855278015, | |
| "step": 240, | |
| "valid_targets_mean": 2516.1, | |
| "valid_targets_min": 748 | |
| }, | |
| { | |
| "epoch": 0.392, | |
| "grad_norm": 0.7701222695014215, | |
| "learning_rate": 2.6026666666666666e-05, | |
| "loss": 0.5012, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5424163341522217, | |
| "step": 245, | |
| "valid_targets_mean": 2096.6, | |
| "valid_targets_min": 938 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.8091486942145641, | |
| "learning_rate": 2.6560000000000003e-05, | |
| "loss": 0.4936, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5072291493415833, | |
| "step": 250, | |
| "valid_targets_mean": 2084.3, | |
| "valid_targets_min": 588 | |
| }, | |
| { | |
| "epoch": 0.408, | |
| "grad_norm": 0.6898982048013277, | |
| "learning_rate": 2.7093333333333337e-05, | |
| "loss": 0.4968, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4925045967102051, | |
| "step": 255, | |
| "valid_targets_mean": 2633.6, | |
| "valid_targets_min": 777 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 0.8251422552999986, | |
| "learning_rate": 2.7626666666666668e-05, | |
| "loss": 0.5298, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5059669017791748, | |
| "step": 260, | |
| "valid_targets_mean": 2005.4, | |
| "valid_targets_min": 666 | |
| }, | |
| { | |
| "epoch": 0.424, | |
| "grad_norm": 0.7431003149844874, | |
| "learning_rate": 2.816e-05, | |
| "loss": 0.4875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5014691352844238, | |
| "step": 265, | |
| "valid_targets_mean": 2241.7, | |
| "valid_targets_min": 1208 | |
| }, | |
| { | |
| "epoch": 0.432, | |
| "grad_norm": 0.6125432327307639, | |
| "learning_rate": 2.869333333333334e-05, | |
| "loss": 0.4721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4296124577522278, | |
| "step": 270, | |
| "valid_targets_mean": 2600.2, | |
| "valid_targets_min": 608 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 0.8330366261881735, | |
| "learning_rate": 2.922666666666667e-05, | |
| "loss": 0.4906, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5452744960784912, | |
| "step": 275, | |
| "valid_targets_mean": 2064.5, | |
| "valid_targets_min": 641 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.7334594138189166, | |
| "learning_rate": 2.9760000000000003e-05, | |
| "loss": 0.4781, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4792283773422241, | |
| "step": 280, | |
| "valid_targets_mean": 2248.9, | |
| "valid_targets_min": 770 | |
| }, | |
| { | |
| "epoch": 0.456, | |
| "grad_norm": 0.7048066084402315, | |
| "learning_rate": 3.0293333333333334e-05, | |
| "loss": 0.5071, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4940047264099121, | |
| "step": 285, | |
| "valid_targets_mean": 2448.4, | |
| "valid_targets_min": 718 | |
| }, | |
| { | |
| "epoch": 0.464, | |
| "grad_norm": 0.7321813936089969, | |
| "learning_rate": 3.0826666666666674e-05, | |
| "loss": 0.4861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4731312096118927, | |
| "step": 290, | |
| "valid_targets_mean": 2213.3, | |
| "valid_targets_min": 614 | |
| }, | |
| { | |
| "epoch": 0.472, | |
| "grad_norm": 0.7327861766468036, | |
| "learning_rate": 3.1360000000000005e-05, | |
| "loss": 0.4978, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4812717139720917, | |
| "step": 295, | |
| "valid_targets_mean": 2238.8, | |
| "valid_targets_min": 612 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.6978085848971363, | |
| "learning_rate": 3.1893333333333335e-05, | |
| "loss": 0.4803, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46808069944381714, | |
| "step": 300, | |
| "valid_targets_mean": 2457.3, | |
| "valid_targets_min": 442 | |
| }, | |
| { | |
| "epoch": 0.488, | |
| "grad_norm": 0.8111913410877619, | |
| "learning_rate": 3.2426666666666666e-05, | |
| "loss": 0.479, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44130653142929077, | |
| "step": 305, | |
| "valid_targets_mean": 2637.6, | |
| "valid_targets_min": 1587 | |
| }, | |
| { | |
| "epoch": 0.496, | |
| "grad_norm": 0.7458942349832357, | |
| "learning_rate": 3.296e-05, | |
| "loss": 0.4808, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49815142154693604, | |
| "step": 310, | |
| "valid_targets_mean": 2545.6, | |
| "valid_targets_min": 869 | |
| }, | |
| { | |
| "epoch": 0.504, | |
| "grad_norm": 0.7385285311519089, | |
| "learning_rate": 3.349333333333334e-05, | |
| "loss": 0.4661, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5133688449859619, | |
| "step": 315, | |
| "valid_targets_mean": 2280.2, | |
| "valid_targets_min": 386 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.7586069534178319, | |
| "learning_rate": 3.402666666666667e-05, | |
| "loss": 0.4886, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4860743284225464, | |
| "step": 320, | |
| "valid_targets_mean": 2133.5, | |
| "valid_targets_min": 712 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 0.6422577702781003, | |
| "learning_rate": 3.456e-05, | |
| "loss": 0.4812, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4424203634262085, | |
| "step": 325, | |
| "valid_targets_mean": 2430.3, | |
| "valid_targets_min": 978 | |
| }, | |
| { | |
| "epoch": 0.528, | |
| "grad_norm": 0.6745500124718121, | |
| "learning_rate": 3.509333333333333e-05, | |
| "loss": 0.4766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49251464009284973, | |
| "step": 330, | |
| "valid_targets_mean": 2522.8, | |
| "valid_targets_min": 820 | |
| }, | |
| { | |
| "epoch": 0.536, | |
| "grad_norm": 0.6458982073475765, | |
| "learning_rate": 3.562666666666667e-05, | |
| "loss": 0.475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4584057331085205, | |
| "step": 335, | |
| "valid_targets_mean": 2477.6, | |
| "valid_targets_min": 969 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 0.6962215050016953, | |
| "learning_rate": 3.6160000000000006e-05, | |
| "loss": 0.4734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4557253122329712, | |
| "step": 340, | |
| "valid_targets_mean": 2211.9, | |
| "valid_targets_min": 659 | |
| }, | |
| { | |
| "epoch": 0.552, | |
| "grad_norm": 0.6214790557238985, | |
| "learning_rate": 3.669333333333334e-05, | |
| "loss": 0.4598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.439766526222229, | |
| "step": 345, | |
| "valid_targets_mean": 2683.5, | |
| "valid_targets_min": 1118 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.6918806504772981, | |
| "learning_rate": 3.722666666666667e-05, | |
| "loss": 0.4765, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48400256037712097, | |
| "step": 350, | |
| "valid_targets_mean": 2405.7, | |
| "valid_targets_min": 1346 | |
| }, | |
| { | |
| "epoch": 0.568, | |
| "grad_norm": 0.700219210449274, | |
| "learning_rate": 3.7760000000000004e-05, | |
| "loss": 0.4668, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48044857382774353, | |
| "step": 355, | |
| "valid_targets_mean": 2154.9, | |
| "valid_targets_min": 616 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.7550351146312819, | |
| "learning_rate": 3.8293333333333335e-05, | |
| "loss": 0.4845, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4983769655227661, | |
| "step": 360, | |
| "valid_targets_mean": 1993.4, | |
| "valid_targets_min": 670 | |
| }, | |
| { | |
| "epoch": 0.584, | |
| "grad_norm": 0.8289049493344646, | |
| "learning_rate": 3.882666666666667e-05, | |
| "loss": 0.4842, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5466411113739014, | |
| "step": 365, | |
| "valid_targets_mean": 2076.8, | |
| "valid_targets_min": 698 | |
| }, | |
| { | |
| "epoch": 0.592, | |
| "grad_norm": 0.6624910703185226, | |
| "learning_rate": 3.936e-05, | |
| "loss": 0.5011, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.482534259557724, | |
| "step": 370, | |
| "valid_targets_mean": 2544.8, | |
| "valid_targets_min": 1580 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.782517624818849, | |
| "learning_rate": 3.989333333333333e-05, | |
| "loss": 0.4851, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47620609402656555, | |
| "step": 375, | |
| "valid_targets_mean": 1883.6, | |
| "valid_targets_min": 685 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 0.6417723288508768, | |
| "learning_rate": 3.9999861365387784e-05, | |
| "loss": 0.4697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4307625889778137, | |
| "step": 380, | |
| "valid_targets_mean": 2684.2, | |
| "valid_targets_min": 974 | |
| }, | |
| { | |
| "epoch": 0.616, | |
| "grad_norm": 0.6498871680670533, | |
| "learning_rate": 3.9999298165569614e-05, | |
| "loss": 0.4618, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4357556402683258, | |
| "step": 385, | |
| "valid_targets_mean": 2512.1, | |
| "valid_targets_min": 879 | |
| }, | |
| { | |
| "epoch": 0.624, | |
| "grad_norm": 0.699782882602203, | |
| "learning_rate": 3.999830174807269e-05, | |
| "loss": 0.4722, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47853145003318787, | |
| "step": 390, | |
| "valid_targets_mean": 2307.6, | |
| "valid_targets_min": 846 | |
| }, | |
| { | |
| "epoch": 0.632, | |
| "grad_norm": 0.6566001605088011, | |
| "learning_rate": 3.9996872134481036e-05, | |
| "loss": 0.4579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.472840815782547, | |
| "step": 395, | |
| "valid_targets_mean": 2387.8, | |
| "valid_targets_min": 764 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.8011644192046202, | |
| "learning_rate": 3.999500935576245e-05, | |
| "loss": 0.4831, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47220632433891296, | |
| "step": 400, | |
| "valid_targets_mean": 2383.8, | |
| "valid_targets_min": 886 | |
| }, | |
| { | |
| "epoch": 0.648, | |
| "grad_norm": 0.7219930471596472, | |
| "learning_rate": 3.999271345226776e-05, | |
| "loss": 0.4846, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.510077178478241, | |
| "step": 405, | |
| "valid_targets_mean": 2494.2, | |
| "valid_targets_min": 951 | |
| }, | |
| { | |
| "epoch": 0.656, | |
| "grad_norm": 0.6472532682023311, | |
| "learning_rate": 3.9989984473730035e-05, | |
| "loss": 0.4598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4869913160800934, | |
| "step": 410, | |
| "valid_targets_mean": 2634.6, | |
| "valid_targets_min": 826 | |
| }, | |
| { | |
| "epoch": 0.664, | |
| "grad_norm": 0.626388386803493, | |
| "learning_rate": 3.998682247926343e-05, | |
| "loss": 0.458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45631903409957886, | |
| "step": 415, | |
| "valid_targets_mean": 2277.9, | |
| "valid_targets_min": 396 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 0.6114116058267862, | |
| "learning_rate": 3.998322753736193e-05, | |
| "loss": 0.4832, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47891151905059814, | |
| "step": 420, | |
| "valid_targets_mean": 2695.1, | |
| "valid_targets_min": 760 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 0.6591550740243267, | |
| "learning_rate": 3.99791997258979e-05, | |
| "loss": 0.4783, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45776450634002686, | |
| "step": 425, | |
| "valid_targets_mean": 2526.2, | |
| "valid_targets_min": 900 | |
| }, | |
| { | |
| "epoch": 0.688, | |
| "grad_norm": 0.6663403401337069, | |
| "learning_rate": 3.997473913212036e-05, | |
| "loss": 0.4678, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48343199491500854, | |
| "step": 430, | |
| "valid_targets_mean": 2378.8, | |
| "valid_targets_min": 795 | |
| }, | |
| { | |
| "epoch": 0.696, | |
| "grad_norm": 0.6670143387646009, | |
| "learning_rate": 3.9969845852653087e-05, | |
| "loss": 0.478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45757466554641724, | |
| "step": 435, | |
| "valid_targets_mean": 2522.7, | |
| "valid_targets_min": 1505 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.58199200233584, | |
| "learning_rate": 3.996451999349258e-05, | |
| "loss": 0.4386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42813169956207275, | |
| "step": 440, | |
| "valid_targets_mean": 2870.2, | |
| "valid_targets_min": 1437 | |
| }, | |
| { | |
| "epoch": 0.712, | |
| "grad_norm": 0.6752845213387725, | |
| "learning_rate": 3.995876167000569e-05, | |
| "loss": 0.4821, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47400549054145813, | |
| "step": 445, | |
| "valid_targets_mean": 2750.9, | |
| "valid_targets_min": 944 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.667561633288874, | |
| "learning_rate": 3.9952571006927186e-05, | |
| "loss": 0.4551, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48587462306022644, | |
| "step": 450, | |
| "valid_targets_mean": 2546.8, | |
| "valid_targets_min": 719 | |
| }, | |
| { | |
| "epoch": 0.728, | |
| "grad_norm": 0.6724373683462508, | |
| "learning_rate": 3.9945948138356995e-05, | |
| "loss": 0.4661, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4643342196941376, | |
| "step": 455, | |
| "valid_targets_mean": 2515.9, | |
| "valid_targets_min": 1330 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 0.7049819458106401, | |
| "learning_rate": 3.993889320775735e-05, | |
| "loss": 0.4656, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4860385060310364, | |
| "step": 460, | |
| "valid_targets_mean": 2231.8, | |
| "valid_targets_min": 576 | |
| }, | |
| { | |
| "epoch": 0.744, | |
| "grad_norm": 0.6400462079946704, | |
| "learning_rate": 3.9931406367949627e-05, | |
| "loss": 0.4794, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4631817042827606, | |
| "step": 465, | |
| "valid_targets_mean": 2380.6, | |
| "valid_targets_min": 667 | |
| }, | |
| { | |
| "epoch": 0.752, | |
| "grad_norm": 0.8286441176655512, | |
| "learning_rate": 3.9923487781111106e-05, | |
| "loss": 0.4646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4641321003437042, | |
| "step": 470, | |
| "valid_targets_mean": 2411.0, | |
| "valid_targets_min": 749 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.7068889030236437, | |
| "learning_rate": 3.9915137618771386e-05, | |
| "loss": 0.4751, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46361860632896423, | |
| "step": 475, | |
| "valid_targets_mean": 2641.8, | |
| "valid_targets_min": 751 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.6181802936183315, | |
| "learning_rate": 3.9906356061808713e-05, | |
| "loss": 0.4556, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47535887360572815, | |
| "step": 480, | |
| "valid_targets_mean": 2579.4, | |
| "valid_targets_min": 1069 | |
| }, | |
| { | |
| "epoch": 0.776, | |
| "grad_norm": 0.754698609738125, | |
| "learning_rate": 3.9897143300446055e-05, | |
| "loss": 0.4762, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49311938881874084, | |
| "step": 485, | |
| "valid_targets_mean": 2111.9, | |
| "valid_targets_min": 417 | |
| }, | |
| { | |
| "epoch": 0.784, | |
| "grad_norm": 0.679634462005907, | |
| "learning_rate": 3.988749953424696e-05, | |
| "loss": 0.4786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46043074131011963, | |
| "step": 490, | |
| "valid_targets_mean": 2328.2, | |
| "valid_targets_min": 615 | |
| }, | |
| { | |
| "epoch": 0.792, | |
| "grad_norm": 0.815808151327097, | |
| "learning_rate": 3.9877424972111264e-05, | |
| "loss": 0.4692, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5270492434501648, | |
| "step": 495, | |
| "valid_targets_mean": 2088.6, | |
| "valid_targets_min": 798 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.5897123286869944, | |
| "learning_rate": 3.9866919832270554e-05, | |
| "loss": 0.4608, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4187796711921692, | |
| "step": 500, | |
| "valid_targets_mean": 2436.2, | |
| "valid_targets_min": 1019 | |
| }, | |
| { | |
| "epoch": 0.808, | |
| "grad_norm": 0.7432501071695375, | |
| "learning_rate": 3.9855984342283414e-05, | |
| "loss": 0.4667, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4671873450279236, | |
| "step": 505, | |
| "valid_targets_mean": 1886.4, | |
| "valid_targets_min": 681 | |
| }, | |
| { | |
| "epoch": 0.816, | |
| "grad_norm": 0.6216648090898564, | |
| "learning_rate": 3.9844618739030545e-05, | |
| "loss": 0.4729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4360358715057373, | |
| "step": 510, | |
| "valid_targets_mean": 2900.6, | |
| "valid_targets_min": 684 | |
| }, | |
| { | |
| "epoch": 0.824, | |
| "grad_norm": 0.6939020046248706, | |
| "learning_rate": 3.98328232687096e-05, | |
| "loss": 0.4739, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49482274055480957, | |
| "step": 515, | |
| "valid_targets_mean": 2357.6, | |
| "valid_targets_min": 590 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 0.721859248226527, | |
| "learning_rate": 3.982059818682986e-05, | |
| "loss": 0.5004, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5051529407501221, | |
| "step": 520, | |
| "valid_targets_mean": 2156.8, | |
| "valid_targets_min": 687 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.6494270332660046, | |
| "learning_rate": 3.980794375820669e-05, | |
| "loss": 0.4499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4555926024913788, | |
| "step": 525, | |
| "valid_targets_mean": 2189.4, | |
| "valid_targets_min": 674 | |
| }, | |
| { | |
| "epoch": 0.848, | |
| "grad_norm": 0.7317549355927052, | |
| "learning_rate": 3.9794860256955825e-05, | |
| "loss": 0.4758, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48144295811653137, | |
| "step": 530, | |
| "valid_targets_mean": 1934.8, | |
| "valid_targets_min": 777 | |
| }, | |
| { | |
| "epoch": 0.856, | |
| "grad_norm": 0.6819599897787019, | |
| "learning_rate": 3.9781347966487415e-05, | |
| "loss": 0.4861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49034303426742554, | |
| "step": 535, | |
| "valid_targets_mean": 2477.9, | |
| "valid_targets_min": 615 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 0.6893642280639611, | |
| "learning_rate": 3.9767407179499875e-05, | |
| "loss": 0.4751, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.481620728969574, | |
| "step": 540, | |
| "valid_targets_mean": 2269.1, | |
| "valid_targets_min": 496 | |
| }, | |
| { | |
| "epoch": 0.872, | |
| "grad_norm": 0.731052766030611, | |
| "learning_rate": 3.975303819797358e-05, | |
| "loss": 0.4605, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4804762601852417, | |
| "step": 545, | |
| "valid_targets_mean": 2014.6, | |
| "valid_targets_min": 825 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.6245922571157282, | |
| "learning_rate": 3.973824133316431e-05, | |
| "loss": 0.4514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4820593297481537, | |
| "step": 550, | |
| "valid_targets_mean": 2448.4, | |
| "valid_targets_min": 895 | |
| }, | |
| { | |
| "epoch": 0.888, | |
| "grad_norm": 0.6626544949374586, | |
| "learning_rate": 3.972301690559645e-05, | |
| "loss": 0.4512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4780968129634857, | |
| "step": 555, | |
| "valid_targets_mean": 2211.1, | |
| "valid_targets_min": 1262 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 0.6705544140440111, | |
| "learning_rate": 3.970736524505615e-05, | |
| "loss": 0.4664, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4690876603126526, | |
| "step": 560, | |
| "valid_targets_mean": 2282.8, | |
| "valid_targets_min": 729 | |
| }, | |
| { | |
| "epoch": 0.904, | |
| "grad_norm": 0.6584722171298177, | |
| "learning_rate": 3.969128669058411e-05, | |
| "loss": 0.4626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4553741216659546, | |
| "step": 565, | |
| "valid_targets_mean": 2019.5, | |
| "valid_targets_min": 484 | |
| }, | |
| { | |
| "epoch": 0.912, | |
| "grad_norm": 0.6229943668673381, | |
| "learning_rate": 3.9674781590468256e-05, | |
| "loss": 0.4442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45545560121536255, | |
| "step": 570, | |
| "valid_targets_mean": 2519.6, | |
| "valid_targets_min": 841 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 0.6751176168853488, | |
| "learning_rate": 3.9657850302236184e-05, | |
| "loss": 0.4485, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47964492440223694, | |
| "step": 575, | |
| "valid_targets_mean": 2350.7, | |
| "valid_targets_min": 1221 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 0.6014793412024203, | |
| "learning_rate": 3.964049319264744e-05, | |
| "loss": 0.4446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4613630473613739, | |
| "step": 580, | |
| "valid_targets_mean": 2549.6, | |
| "valid_targets_min": 812 | |
| }, | |
| { | |
| "epoch": 0.936, | |
| "grad_norm": 0.5807355939238428, | |
| "learning_rate": 3.962271063768555e-05, | |
| "loss": 0.4569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4311615824699402, | |
| "step": 585, | |
| "valid_targets_mean": 2432.5, | |
| "valid_targets_min": 851 | |
| }, | |
| { | |
| "epoch": 0.944, | |
| "grad_norm": 0.6178326812634055, | |
| "learning_rate": 3.960450302254989e-05, | |
| "loss": 0.4474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4509955048561096, | |
| "step": 590, | |
| "valid_targets_mean": 2490.1, | |
| "valid_targets_min": 746 | |
| }, | |
| { | |
| "epoch": 0.952, | |
| "grad_norm": 0.6233350959016624, | |
| "learning_rate": 3.958587074164735e-05, | |
| "loss": 0.4447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45208895206451416, | |
| "step": 595, | |
| "valid_targets_mean": 2428.0, | |
| "valid_targets_min": 998 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.7009493324631896, | |
| "learning_rate": 3.956681419858376e-05, | |
| "loss": 0.4571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4502309560775757, | |
| "step": 600, | |
| "valid_targets_mean": 2555.1, | |
| "valid_targets_min": 987 | |
| }, | |
| { | |
| "epoch": 0.968, | |
| "grad_norm": 0.5923922989881036, | |
| "learning_rate": 3.954733380615516e-05, | |
| "loss": 0.4396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4207536578178406, | |
| "step": 605, | |
| "valid_targets_mean": 2795.0, | |
| "valid_targets_min": 1191 | |
| }, | |
| { | |
| "epoch": 0.976, | |
| "grad_norm": 0.5779207349870441, | |
| "learning_rate": 3.95274299863389e-05, | |
| "loss": 0.4469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4246981739997864, | |
| "step": 610, | |
| "valid_targets_mean": 2679.1, | |
| "valid_targets_min": 1744 | |
| }, | |
| { | |
| "epoch": 0.984, | |
| "grad_norm": 0.6198976743598128, | |
| "learning_rate": 3.950710317028443e-05, | |
| "loss": 0.4532, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4403854012489319, | |
| "step": 615, | |
| "valid_targets_mean": 2471.3, | |
| "valid_targets_min": 1215 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 0.6121629508015668, | |
| "learning_rate": 3.9486353798303996e-05, | |
| "loss": 0.443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4395740032196045, | |
| "step": 620, | |
| "valid_targets_mean": 2559.3, | |
| "valid_targets_min": 676 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.799149286424459, | |
| "learning_rate": 3.946518231986313e-05, | |
| "loss": 0.4748, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49580055475234985, | |
| "step": 625, | |
| "valid_targets_mean": 2016.3, | |
| "valid_targets_min": 653 | |
| }, | |
| { | |
| "epoch": 1.008, | |
| "grad_norm": 0.5330351493981808, | |
| "learning_rate": 3.9443589193570847e-05, | |
| "loss": 0.4202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39457374811172485, | |
| "step": 630, | |
| "valid_targets_mean": 2838.1, | |
| "valid_targets_min": 606 | |
| }, | |
| { | |
| "epoch": 1.016, | |
| "grad_norm": 0.6549967985881883, | |
| "learning_rate": 3.942157488716976e-05, | |
| "loss": 0.4273, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4309835433959961, | |
| "step": 635, | |
| "valid_targets_mean": 2344.7, | |
| "valid_targets_min": 841 | |
| }, | |
| { | |
| "epoch": 1.024, | |
| "grad_norm": 0.7153660835249178, | |
| "learning_rate": 3.939913987752595e-05, | |
| "loss": 0.4383, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.471935510635376, | |
| "step": 640, | |
| "valid_targets_mean": 2266.1, | |
| "valid_targets_min": 741 | |
| }, | |
| { | |
| "epoch": 1.032, | |
| "grad_norm": 0.8338207180913084, | |
| "learning_rate": 3.9376284650618605e-05, | |
| "loss": 0.4401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4705868065357208, | |
| "step": 645, | |
| "valid_targets_mean": 2167.0, | |
| "valid_targets_min": 601 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 0.7086511513875436, | |
| "learning_rate": 3.935300970152952e-05, | |
| "loss": 0.4555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4832397699356079, | |
| "step": 650, | |
| "valid_targets_mean": 2114.9, | |
| "valid_targets_min": 484 | |
| }, | |
| { | |
| "epoch": 1.048, | |
| "grad_norm": 0.6225217966467873, | |
| "learning_rate": 3.932931553443235e-05, | |
| "loss": 0.4612, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4231920838356018, | |
| "step": 655, | |
| "valid_targets_mean": 2642.4, | |
| "valid_targets_min": 1311 | |
| }, | |
| { | |
| "epoch": 1.056, | |
| "grad_norm": 0.5825267016910932, | |
| "learning_rate": 3.930520266258173e-05, | |
| "loss": 0.4561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4089941084384918, | |
| "step": 660, | |
| "valid_targets_mean": 2996.6, | |
| "valid_targets_min": 620 | |
| }, | |
| { | |
| "epoch": 1.064, | |
| "grad_norm": 0.7200762284984045, | |
| "learning_rate": 3.928067160830208e-05, | |
| "loss": 0.4515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46363526582717896, | |
| "step": 665, | |
| "valid_targets_mean": 2121.4, | |
| "valid_targets_min": 530 | |
| }, | |
| { | |
| "epoch": 1.072, | |
| "grad_norm": 0.7015136785171452, | |
| "learning_rate": 3.925572290297638e-05, | |
| "loss": 0.4273, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4679427146911621, | |
| "step": 670, | |
| "valid_targets_mean": 2227.5, | |
| "valid_targets_min": 683 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 0.6123409957265331, | |
| "learning_rate": 3.9230357087034606e-05, | |
| "loss": 0.4504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42846158146858215, | |
| "step": 675, | |
| "valid_targets_mean": 2303.3, | |
| "valid_targets_min": 370 | |
| }, | |
| { | |
| "epoch": 1.088, | |
| "grad_norm": 0.6261474208255726, | |
| "learning_rate": 3.9204574709942036e-05, | |
| "loss": 0.4302, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.462083637714386, | |
| "step": 680, | |
| "valid_targets_mean": 2561.6, | |
| "valid_targets_min": 1184 | |
| }, | |
| { | |
| "epoch": 1.096, | |
| "grad_norm": 0.6088483216638148, | |
| "learning_rate": 3.917837633018734e-05, | |
| "loss": 0.4435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4106854200363159, | |
| "step": 685, | |
| "valid_targets_mean": 2380.8, | |
| "valid_targets_min": 548 | |
| }, | |
| { | |
| "epoch": 1.104, | |
| "grad_norm": 0.6534853519400231, | |
| "learning_rate": 3.915176251527051e-05, | |
| "loss": 0.4238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4356333017349243, | |
| "step": 690, | |
| "valid_targets_mean": 2245.1, | |
| "valid_targets_min": 1293 | |
| }, | |
| { | |
| "epoch": 1.112, | |
| "grad_norm": 0.7245451888948117, | |
| "learning_rate": 3.912473384169051e-05, | |
| "loss": 0.4383, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4790792465209961, | |
| "step": 695, | |
| "valid_targets_mean": 1835.5, | |
| "valid_targets_min": 306 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.7305794087802582, | |
| "learning_rate": 3.9097290894932866e-05, | |
| "loss": 0.4387, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44803181290626526, | |
| "step": 700, | |
| "valid_targets_mean": 1999.6, | |
| "valid_targets_min": 749 | |
| }, | |
| { | |
| "epoch": 1.1280000000000001, | |
| "grad_norm": 0.7068826644187056, | |
| "learning_rate": 3.906943426945691e-05, | |
| "loss": 0.4305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4128977060317993, | |
| "step": 705, | |
| "valid_targets_mean": 2458.2, | |
| "valid_targets_min": 814 | |
| }, | |
| { | |
| "epoch": 1.1360000000000001, | |
| "grad_norm": 0.6339840636291904, | |
| "learning_rate": 3.9041164568682955e-05, | |
| "loss": 0.4257, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4198548495769501, | |
| "step": 710, | |
| "valid_targets_mean": 2165.4, | |
| "valid_targets_min": 682 | |
| }, | |
| { | |
| "epoch": 1.144, | |
| "grad_norm": 0.5528313866253091, | |
| "learning_rate": 3.90124824049792e-05, | |
| "loss": 0.4455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.407492071390152, | |
| "step": 715, | |
| "valid_targets_mean": 2830.7, | |
| "valid_targets_min": 924 | |
| }, | |
| { | |
| "epoch": 1.152, | |
| "grad_norm": 0.6483857714237081, | |
| "learning_rate": 3.8983388399648465e-05, | |
| "loss": 0.4181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42082953453063965, | |
| "step": 720, | |
| "valid_targets_mean": 2129.1, | |
| "valid_targets_min": 685 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 0.7694201103576355, | |
| "learning_rate": 3.895388318291474e-05, | |
| "loss": 0.4276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4231267273426056, | |
| "step": 725, | |
| "valid_targets_mean": 2458.7, | |
| "valid_targets_min": 670 | |
| }, | |
| { | |
| "epoch": 1.168, | |
| "grad_norm": 0.5506654953283203, | |
| "learning_rate": 3.892396739390952e-05, | |
| "loss": 0.4124, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42060285806655884, | |
| "step": 730, | |
| "valid_targets_mean": 2666.7, | |
| "valid_targets_min": 916 | |
| }, | |
| { | |
| "epoch": 1.176, | |
| "grad_norm": 0.6939198546037962, | |
| "learning_rate": 3.8893641680657986e-05, | |
| "loss": 0.4463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40406715869903564, | |
| "step": 735, | |
| "valid_targets_mean": 2180.9, | |
| "valid_targets_min": 576 | |
| }, | |
| { | |
| "epoch": 1.184, | |
| "grad_norm": 0.638939912139877, | |
| "learning_rate": 3.886290670006495e-05, | |
| "loss": 0.4307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42432937026023865, | |
| "step": 740, | |
| "valid_targets_mean": 2253.4, | |
| "valid_targets_min": 788 | |
| }, | |
| { | |
| "epoch": 1.192, | |
| "grad_norm": 0.6065560979177493, | |
| "learning_rate": 3.8831763117900605e-05, | |
| "loss": 0.4421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42419037222862244, | |
| "step": 745, | |
| "valid_targets_mean": 2507.4, | |
| "valid_targets_min": 729 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.6276197118956589, | |
| "learning_rate": 3.8800211608786166e-05, | |
| "loss": 0.4354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44459959864616394, | |
| "step": 750, | |
| "valid_targets_mean": 2263.6, | |
| "valid_targets_min": 607 | |
| }, | |
| { | |
| "epoch": 1.208, | |
| "grad_norm": 0.5928256043581343, | |
| "learning_rate": 3.876825285617918e-05, | |
| "loss": 0.4252, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3992008864879608, | |
| "step": 755, | |
| "valid_targets_mean": 2632.4, | |
| "valid_targets_min": 837 | |
| }, | |
| { | |
| "epoch": 1.216, | |
| "grad_norm": 0.6932083981151625, | |
| "learning_rate": 3.873588755235876e-05, | |
| "loss": 0.445, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4538978934288025, | |
| "step": 760, | |
| "valid_targets_mean": 2446.4, | |
| "valid_targets_min": 919 | |
| }, | |
| { | |
| "epoch": 1.224, | |
| "grad_norm": 0.6600533915715574, | |
| "learning_rate": 3.870311639841062e-05, | |
| "loss": 0.4379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46171343326568604, | |
| "step": 765, | |
| "valid_targets_mean": 2320.2, | |
| "valid_targets_min": 938 | |
| }, | |
| { | |
| "epoch": 1.232, | |
| "grad_norm": 0.6570908053641036, | |
| "learning_rate": 3.866994010421182e-05, | |
| "loss": 0.4341, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4576682448387146, | |
| "step": 770, | |
| "valid_targets_mean": 2393.4, | |
| "valid_targets_min": 1134 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 0.6076898436586241, | |
| "learning_rate": 3.863635938841545e-05, | |
| "loss": 0.4333, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42452096939086914, | |
| "step": 775, | |
| "valid_targets_mean": 2579.1, | |
| "valid_targets_min": 1369 | |
| }, | |
| { | |
| "epoch": 1.248, | |
| "grad_norm": 0.6534130436400201, | |
| "learning_rate": 3.8602374978435015e-05, | |
| "loss": 0.4144, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4091978669166565, | |
| "step": 780, | |
| "valid_targets_mean": 2470.7, | |
| "valid_targets_min": 662 | |
| }, | |
| { | |
| "epoch": 1.256, | |
| "grad_norm": 0.601406785106393, | |
| "learning_rate": 3.8567987610428705e-05, | |
| "loss": 0.4455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41432830691337585, | |
| "step": 785, | |
| "valid_targets_mean": 2529.9, | |
| "valid_targets_min": 997 | |
| }, | |
| { | |
| "epoch": 1.264, | |
| "grad_norm": 0.6533424854411587, | |
| "learning_rate": 3.853319802928345e-05, | |
| "loss": 0.4466, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4692718982696533, | |
| "step": 790, | |
| "valid_targets_mean": 2336.9, | |
| "valid_targets_min": 973 | |
| }, | |
| { | |
| "epoch": 1.272, | |
| "grad_norm": 0.682487385408, | |
| "learning_rate": 3.849800698859877e-05, | |
| "loss": 0.4366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45809996128082275, | |
| "step": 795, | |
| "valid_targets_mean": 2192.1, | |
| "valid_targets_min": 737 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 0.5916792104874027, | |
| "learning_rate": 3.846241525067047e-05, | |
| "loss": 0.4523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44107410311698914, | |
| "step": 800, | |
| "valid_targets_mean": 2847.6, | |
| "valid_targets_min": 1315 | |
| }, | |
| { | |
| "epoch": 1.288, | |
| "grad_norm": 0.5829522122783333, | |
| "learning_rate": 3.842642358647411e-05, | |
| "loss": 0.4391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4088653028011322, | |
| "step": 805, | |
| "valid_targets_mean": 2645.9, | |
| "valid_targets_min": 875 | |
| }, | |
| { | |
| "epoch": 1.296, | |
| "grad_norm": 0.5499814298234279, | |
| "learning_rate": 3.839003277564831e-05, | |
| "loss": 0.418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4087795615196228, | |
| "step": 810, | |
| "valid_targets_mean": 2580.8, | |
| "valid_targets_min": 1486 | |
| }, | |
| { | |
| "epoch": 1.304, | |
| "grad_norm": 0.6574479760890423, | |
| "learning_rate": 3.835324360647785e-05, | |
| "loss": 0.4347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4606819748878479, | |
| "step": 815, | |
| "valid_targets_mean": 2330.2, | |
| "valid_targets_min": 817 | |
| }, | |
| { | |
| "epoch": 1.312, | |
| "grad_norm": 0.601654460513071, | |
| "learning_rate": 3.831605687587663e-05, | |
| "loss": 0.4312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4386676251888275, | |
| "step": 820, | |
| "valid_targets_mean": 2486.2, | |
| "valid_targets_min": 923 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.6595379480500122, | |
| "learning_rate": 3.827847338937037e-05, | |
| "loss": 0.4383, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47224780917167664, | |
| "step": 825, | |
| "valid_targets_mean": 2481.6, | |
| "valid_targets_min": 1308 | |
| }, | |
| { | |
| "epoch": 1.328, | |
| "grad_norm": 0.6420505342297114, | |
| "learning_rate": 3.824049396107918e-05, | |
| "loss": 0.4555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4457557797431946, | |
| "step": 830, | |
| "valid_targets_mean": 2432.2, | |
| "valid_targets_min": 1208 | |
| }, | |
| { | |
| "epoch": 1.336, | |
| "grad_norm": 0.6171185957266434, | |
| "learning_rate": 3.8202119413699914e-05, | |
| "loss": 0.4501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4661561846733093, | |
| "step": 835, | |
| "valid_targets_mean": 2544.8, | |
| "valid_targets_min": 1655 | |
| }, | |
| { | |
| "epoch": 1.3439999999999999, | |
| "grad_norm": 0.7465821312386511, | |
| "learning_rate": 3.8163350578488366e-05, | |
| "loss": 0.4313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4433724284172058, | |
| "step": 840, | |
| "valid_targets_mean": 2051.7, | |
| "valid_targets_min": 692 | |
| }, | |
| { | |
| "epoch": 1.3519999999999999, | |
| "grad_norm": 0.6169111289469572, | |
| "learning_rate": 3.812418829524124e-05, | |
| "loss": 0.4294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.412350058555603, | |
| "step": 845, | |
| "valid_targets_mean": 2435.8, | |
| "valid_targets_min": 589 | |
| }, | |
| { | |
| "epoch": 1.3599999999999999, | |
| "grad_norm": 0.7532082786283157, | |
| "learning_rate": 3.8084633412277974e-05, | |
| "loss": 0.463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4547457695007324, | |
| "step": 850, | |
| "valid_targets_mean": 1884.3, | |
| "valid_targets_min": 713 | |
| }, | |
| { | |
| "epoch": 1.3679999999999999, | |
| "grad_norm": 0.6398375548597498, | |
| "learning_rate": 3.804468678642238e-05, | |
| "loss": 0.4541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42184194922447205, | |
| "step": 855, | |
| "valid_targets_mean": 2393.6, | |
| "valid_targets_min": 615 | |
| }, | |
| { | |
| "epoch": 1.376, | |
| "grad_norm": 0.631628642707112, | |
| "learning_rate": 3.800434928298403e-05, | |
| "loss": 0.4376, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41081473231315613, | |
| "step": 860, | |
| "valid_targets_mean": 2298.9, | |
| "valid_targets_min": 662 | |
| }, | |
| { | |
| "epoch": 1.384, | |
| "grad_norm": 0.6933767743328803, | |
| "learning_rate": 3.796362177573957e-05, | |
| "loss": 0.4417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4457859694957733, | |
| "step": 865, | |
| "valid_targets_mean": 2026.8, | |
| "valid_targets_min": 720 | |
| }, | |
| { | |
| "epoch": 1.392, | |
| "grad_norm": 1.369681479343786, | |
| "learning_rate": 3.792250514691378e-05, | |
| "loss": 0.438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3672015368938446, | |
| "step": 870, | |
| "valid_targets_mean": 3045.9, | |
| "valid_targets_min": 1121 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.6162913143906541, | |
| "learning_rate": 3.788100028716043e-05, | |
| "loss": 0.4476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4049740731716156, | |
| "step": 875, | |
| "valid_targets_mean": 2585.2, | |
| "valid_targets_min": 676 | |
| }, | |
| { | |
| "epoch": 1.408, | |
| "grad_norm": 0.6079961919327072, | |
| "learning_rate": 3.7839108095543016e-05, | |
| "loss": 0.4309, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.405215322971344, | |
| "step": 880, | |
| "valid_targets_mean": 2377.9, | |
| "valid_targets_min": 889 | |
| }, | |
| { | |
| "epoch": 1.416, | |
| "grad_norm": 0.6785807936076146, | |
| "learning_rate": 3.7796829479515295e-05, | |
| "loss": 0.4266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4144185185432434, | |
| "step": 885, | |
| "valid_targets_mean": 2151.0, | |
| "valid_targets_min": 642 | |
| }, | |
| { | |
| "epoch": 1.424, | |
| "grad_norm": 0.6131872618040544, | |
| "learning_rate": 3.775416535490159e-05, | |
| "loss": 0.4558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4448813199996948, | |
| "step": 890, | |
| "valid_targets_mean": 2378.1, | |
| "valid_targets_min": 654 | |
| }, | |
| { | |
| "epoch": 1.432, | |
| "grad_norm": 0.6613538155685743, | |
| "learning_rate": 3.7711116645876984e-05, | |
| "loss": 0.4357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4319063127040863, | |
| "step": 895, | |
| "valid_targets_mean": 2121.9, | |
| "valid_targets_min": 585 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 0.7462290977697131, | |
| "learning_rate": 3.7667684284947286e-05, | |
| "loss": 0.4436, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4267106056213379, | |
| "step": 900, | |
| "valid_targets_mean": 2381.2, | |
| "valid_targets_min": 507 | |
| }, | |
| { | |
| "epoch": 1.448, | |
| "grad_norm": 0.6275859403485409, | |
| "learning_rate": 3.762386921292885e-05, | |
| "loss": 0.454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45304590463638306, | |
| "step": 905, | |
| "valid_targets_mean": 2642.2, | |
| "valid_targets_min": 978 | |
| }, | |
| { | |
| "epoch": 1.456, | |
| "grad_norm": 0.5666889148480784, | |
| "learning_rate": 3.757967237892818e-05, | |
| "loss": 0.4302, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38631516695022583, | |
| "step": 910, | |
| "valid_targets_mean": 2592.1, | |
| "valid_targets_min": 590 | |
| }, | |
| { | |
| "epoch": 1.464, | |
| "grad_norm": 0.5662953110489375, | |
| "learning_rate": 3.7535094740321334e-05, | |
| "loss": 0.447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4322415888309479, | |
| "step": 915, | |
| "valid_targets_mean": 2557.4, | |
| "valid_targets_min": 689 | |
| }, | |
| { | |
| "epoch": 1.472, | |
| "grad_norm": 0.5882496676672685, | |
| "learning_rate": 3.749013726273328e-05, | |
| "loss": 0.4283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4422641694545746, | |
| "step": 920, | |
| "valid_targets_mean": 2509.5, | |
| "valid_targets_min": 1141 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 0.5776506126678617, | |
| "learning_rate": 3.7444800920016875e-05, | |
| "loss": 0.4186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4118436276912689, | |
| "step": 925, | |
| "valid_targets_mean": 2366.6, | |
| "valid_targets_min": 597 | |
| }, | |
| { | |
| "epoch": 1.488, | |
| "grad_norm": 0.6271293382346671, | |
| "learning_rate": 3.7399086694231864e-05, | |
| "loss": 0.4488, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4351826608181, | |
| "step": 930, | |
| "valid_targets_mean": 2374.8, | |
| "valid_targets_min": 1080 | |
| }, | |
| { | |
| "epoch": 1.496, | |
| "grad_norm": 0.7241735711030381, | |
| "learning_rate": 3.735299557562352e-05, | |
| "loss": 0.4349, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4319533407688141, | |
| "step": 935, | |
| "valid_targets_mean": 2160.5, | |
| "valid_targets_min": 852 | |
| }, | |
| { | |
| "epoch": 1.504, | |
| "grad_norm": 0.7100669252442159, | |
| "learning_rate": 3.7306528562601245e-05, | |
| "loss": 0.4398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45116013288497925, | |
| "step": 940, | |
| "valid_targets_mean": 2070.9, | |
| "valid_targets_min": 764 | |
| }, | |
| { | |
| "epoch": 1.512, | |
| "grad_norm": 0.7200292914345058, | |
| "learning_rate": 3.7259686661716945e-05, | |
| "loss": 0.4475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49465733766555786, | |
| "step": 945, | |
| "valid_targets_mean": 2056.4, | |
| "valid_targets_min": 753 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 0.6763700134990425, | |
| "learning_rate": 3.7212470887643204e-05, | |
| "loss": 0.4372, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4461861252784729, | |
| "step": 950, | |
| "valid_targets_mean": 2200.1, | |
| "valid_targets_min": 1060 | |
| }, | |
| { | |
| "epoch": 1.528, | |
| "grad_norm": 0.6213853943162089, | |
| "learning_rate": 3.7164882263151315e-05, | |
| "loss": 0.4394, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4571177065372467, | |
| "step": 955, | |
| "valid_targets_mean": 2428.8, | |
| "valid_targets_min": 930 | |
| }, | |
| { | |
| "epoch": 1.536, | |
| "grad_norm": 0.6604591254411275, | |
| "learning_rate": 3.711692181908913e-05, | |
| "loss": 0.4312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44394445419311523, | |
| "step": 960, | |
| "valid_targets_mean": 2190.9, | |
| "valid_targets_min": 756 | |
| }, | |
| { | |
| "epoch": 1.544, | |
| "grad_norm": 0.621120610099597, | |
| "learning_rate": 3.706859059435871e-05, | |
| "loss": 0.4395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4528801441192627, | |
| "step": 965, | |
| "valid_targets_mean": 2357.9, | |
| "valid_targets_min": 714 | |
| }, | |
| { | |
| "epoch": 1.552, | |
| "grad_norm": 0.6479086626249172, | |
| "learning_rate": 3.701988963589384e-05, | |
| "loss": 0.4319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4565218687057495, | |
| "step": 970, | |
| "valid_targets_mean": 2357.4, | |
| "valid_targets_min": 678 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 0.5929729443171671, | |
| "learning_rate": 3.697081999863736e-05, | |
| "loss": 0.4426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41704872250556946, | |
| "step": 975, | |
| "valid_targets_mean": 2477.4, | |
| "valid_targets_min": 658 | |
| }, | |
| { | |
| "epoch": 1.568, | |
| "grad_norm": 0.7034370523967252, | |
| "learning_rate": 3.692138274551828e-05, | |
| "loss": 0.4317, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4655498266220093, | |
| "step": 980, | |
| "valid_targets_mean": 2352.0, | |
| "valid_targets_min": 1173 | |
| }, | |
| { | |
| "epoch": 1.576, | |
| "grad_norm": 0.6817636602005042, | |
| "learning_rate": 3.687157894742878e-05, | |
| "loss": 0.4379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4376213848590851, | |
| "step": 985, | |
| "valid_targets_mean": 2075.2, | |
| "valid_targets_min": 604 | |
| }, | |
| { | |
| "epoch": 1.584, | |
| "grad_norm": 0.6485914336603663, | |
| "learning_rate": 3.682140968320101e-05, | |
| "loss": 0.4332, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4256216585636139, | |
| "step": 990, | |
| "valid_targets_mean": 2463.4, | |
| "valid_targets_min": 1065 | |
| }, | |
| { | |
| "epoch": 1.592, | |
| "grad_norm": 0.5841306362302757, | |
| "learning_rate": 3.6770876039583725e-05, | |
| "loss": 0.4545, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4446376860141754, | |
| "step": 995, | |
| "valid_targets_mean": 2528.8, | |
| "valid_targets_min": 761 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.6434175836703774, | |
| "learning_rate": 3.671997911121871e-05, | |
| "loss": 0.4571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44120827317237854, | |
| "step": 1000, | |
| "valid_targets_mean": 2088.9, | |
| "valid_targets_min": 686 | |
| }, | |
| { | |
| "epoch": 1.608, | |
| "grad_norm": 0.6288341619537156, | |
| "learning_rate": 3.6668720000617126e-05, | |
| "loss": 0.4208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4059444069862366, | |
| "step": 1005, | |
| "valid_targets_mean": 2276.5, | |
| "valid_targets_min": 786 | |
| }, | |
| { | |
| "epoch": 1.616, | |
| "grad_norm": 0.5844705403803708, | |
| "learning_rate": 3.661709981813558e-05, | |
| "loss": 0.4345, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4230886399745941, | |
| "step": 1010, | |
| "valid_targets_mean": 2555.6, | |
| "valid_targets_min": 809 | |
| }, | |
| { | |
| "epoch": 1.624, | |
| "grad_norm": 0.6244939134987266, | |
| "learning_rate": 3.6565119681952086e-05, | |
| "loss": 0.4553, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4502819776535034, | |
| "step": 1015, | |
| "valid_targets_mean": 2212.4, | |
| "valid_targets_min": 442 | |
| }, | |
| { | |
| "epoch": 1.6320000000000001, | |
| "grad_norm": 0.6173544056476045, | |
| "learning_rate": 3.651278071804186e-05, | |
| "loss": 0.4232, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4541641175746918, | |
| "step": 1020, | |
| "valid_targets_mean": 2676.3, | |
| "valid_targets_min": 2111 | |
| }, | |
| { | |
| "epoch": 1.6400000000000001, | |
| "grad_norm": 0.6326476054076543, | |
| "learning_rate": 3.646008406015291e-05, | |
| "loss": 0.4424, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42796561121940613, | |
| "step": 1025, | |
| "valid_targets_mean": 2486.1, | |
| "valid_targets_min": 967 | |
| }, | |
| { | |
| "epoch": 1.6480000000000001, | |
| "grad_norm": 0.6216395857761358, | |
| "learning_rate": 3.6407030849781475e-05, | |
| "loss": 0.433, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44164803624153137, | |
| "step": 1030, | |
| "valid_targets_mean": 2380.2, | |
| "valid_targets_min": 883 | |
| }, | |
| { | |
| "epoch": 1.6560000000000001, | |
| "grad_norm": 0.5707144839810783, | |
| "learning_rate": 3.635362223614733e-05, | |
| "loss": 0.4237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.402190625667572, | |
| "step": 1035, | |
| "valid_targets_mean": 2646.9, | |
| "valid_targets_min": 875 | |
| }, | |
| { | |
| "epoch": 1.6640000000000001, | |
| "grad_norm": 0.6326893009965505, | |
| "learning_rate": 3.629985937616884e-05, | |
| "loss": 0.4299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4385002553462982, | |
| "step": 1040, | |
| "valid_targets_mean": 2627.1, | |
| "valid_targets_min": 657 | |
| }, | |
| { | |
| "epoch": 1.6720000000000002, | |
| "grad_norm": 0.6175015547121943, | |
| "learning_rate": 3.624574343443794e-05, | |
| "loss": 0.4271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4224476218223572, | |
| "step": 1045, | |
| "valid_targets_mean": 2436.2, | |
| "valid_targets_min": 776 | |
| }, | |
| { | |
| "epoch": 1.6800000000000002, | |
| "grad_norm": 0.6235914122987322, | |
| "learning_rate": 3.619127558319492e-05, | |
| "loss": 0.4613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46333080530166626, | |
| "step": 1050, | |
| "valid_targets_mean": 2319.1, | |
| "valid_targets_min": 1166 | |
| }, | |
| { | |
| "epoch": 1.688, | |
| "grad_norm": 0.6265949309393127, | |
| "learning_rate": 3.613645700230298e-05, | |
| "loss": 0.4432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46041712164878845, | |
| "step": 1055, | |
| "valid_targets_mean": 2153.2, | |
| "valid_targets_min": 659 | |
| }, | |
| { | |
| "epoch": 1.696, | |
| "grad_norm": 0.5655935170182097, | |
| "learning_rate": 3.6081288879222696e-05, | |
| "loss": 0.44, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40793120861053467, | |
| "step": 1060, | |
| "valid_targets_mean": 2697.2, | |
| "valid_targets_min": 612 | |
| }, | |
| { | |
| "epoch": 1.704, | |
| "grad_norm": 0.6618385447827254, | |
| "learning_rate": 3.602577240898633e-05, | |
| "loss": 0.4336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4535277485847473, | |
| "step": 1065, | |
| "valid_targets_mean": 2301.0, | |
| "valid_targets_min": 387 | |
| }, | |
| { | |
| "epoch": 1.712, | |
| "grad_norm": 0.5771516778336349, | |
| "learning_rate": 3.596990879417188e-05, | |
| "loss": 0.4261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40155816078186035, | |
| "step": 1070, | |
| "valid_targets_mean": 2554.8, | |
| "valid_targets_min": 612 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 0.6882606659694092, | |
| "learning_rate": 3.591369924487711e-05, | |
| "loss": 0.4251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4629209041595459, | |
| "step": 1075, | |
| "valid_targets_mean": 1866.8, | |
| "valid_targets_min": 558 | |
| }, | |
| { | |
| "epoch": 1.728, | |
| "grad_norm": 0.6766607679488811, | |
| "learning_rate": 3.585714497869326e-05, | |
| "loss": 0.4374, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45873624086380005, | |
| "step": 1080, | |
| "valid_targets_mean": 2035.2, | |
| "valid_targets_min": 580 | |
| }, | |
| { | |
| "epoch": 1.736, | |
| "grad_norm": 0.589073541077514, | |
| "learning_rate": 3.580024722067872e-05, | |
| "loss": 0.4226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4332297444343567, | |
| "step": 1085, | |
| "valid_targets_mean": 2441.6, | |
| "valid_targets_min": 604 | |
| }, | |
| { | |
| "epoch": 1.744, | |
| "grad_norm": 0.6024297435143611, | |
| "learning_rate": 3.574300720333247e-05, | |
| "loss": 0.4235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44380128383636475, | |
| "step": 1090, | |
| "valid_targets_mean": 2427.1, | |
| "valid_targets_min": 944 | |
| }, | |
| { | |
| "epoch": 1.752, | |
| "grad_norm": 0.5863852076388184, | |
| "learning_rate": 3.568542616656739e-05, | |
| "loss": 0.4374, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4471275210380554, | |
| "step": 1095, | |
| "valid_targets_mean": 2633.6, | |
| "valid_targets_min": 1157 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 0.5884843210677144, | |
| "learning_rate": 3.5627505357683404e-05, | |
| "loss": 0.4324, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42342448234558105, | |
| "step": 1100, | |
| "valid_targets_mean": 2645.6, | |
| "valid_targets_min": 627 | |
| }, | |
| { | |
| "epoch": 1.768, | |
| "grad_norm": 0.6645389429055254, | |
| "learning_rate": 3.5569246031340474e-05, | |
| "loss": 0.439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4746902585029602, | |
| "step": 1105, | |
| "valid_targets_mean": 2341.7, | |
| "valid_targets_min": 628 | |
| }, | |
| { | |
| "epoch": 1.776, | |
| "grad_norm": 0.5966469996066948, | |
| "learning_rate": 3.5510649449531375e-05, | |
| "loss": 0.4271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42743945121765137, | |
| "step": 1110, | |
| "valid_targets_mean": 2537.6, | |
| "valid_targets_min": 643 | |
| }, | |
| { | |
| "epoch": 1.784, | |
| "grad_norm": 0.5878248615166197, | |
| "learning_rate": 3.545171688155441e-05, | |
| "loss": 0.4213, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.430372029542923, | |
| "step": 1115, | |
| "valid_targets_mean": 2542.7, | |
| "valid_targets_min": 1068 | |
| }, | |
| { | |
| "epoch": 1.792, | |
| "grad_norm": 0.6343590604430001, | |
| "learning_rate": 3.5392449603985894e-05, | |
| "loss": 0.4437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43726491928100586, | |
| "step": 1120, | |
| "valid_targets_mean": 2325.7, | |
| "valid_targets_min": 676 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.536815346738192, | |
| "learning_rate": 3.53328489006525e-05, | |
| "loss": 0.4202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3983931243419647, | |
| "step": 1125, | |
| "valid_targets_mean": 2698.4, | |
| "valid_targets_min": 1589 | |
| }, | |
| { | |
| "epoch": 1.808, | |
| "grad_norm": 0.5844130028956325, | |
| "learning_rate": 3.527291606260345e-05, | |
| "loss": 0.4146, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38655996322631836, | |
| "step": 1130, | |
| "valid_targets_mean": 2262.7, | |
| "valid_targets_min": 926 | |
| }, | |
| { | |
| "epoch": 1.8159999999999998, | |
| "grad_norm": 0.6433258701700891, | |
| "learning_rate": 3.521265238808255e-05, | |
| "loss": 0.4425, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42823395133018494, | |
| "step": 1135, | |
| "valid_targets_mean": 1968.8, | |
| "valid_targets_min": 1033 | |
| }, | |
| { | |
| "epoch": 1.8239999999999998, | |
| "grad_norm": 0.582115811790589, | |
| "learning_rate": 3.515205918250007e-05, | |
| "loss": 0.4405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43498510122299194, | |
| "step": 1140, | |
| "valid_targets_mean": 2542.1, | |
| "valid_targets_min": 702 | |
| }, | |
| { | |
| "epoch": 1.8319999999999999, | |
| "grad_norm": 0.6326850159151952, | |
| "learning_rate": 3.5091137758404456e-05, | |
| "loss": 0.4476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44848906993865967, | |
| "step": 1145, | |
| "valid_targets_mean": 2340.6, | |
| "valid_targets_min": 945 | |
| }, | |
| { | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 1.5302422103990057, | |
| "learning_rate": 3.5029889435453924e-05, | |
| "loss": 0.4198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45762234926223755, | |
| "step": 1150, | |
| "valid_targets_mean": 2221.9, | |
| "valid_targets_min": 595 | |
| }, | |
| { | |
| "epoch": 1.8479999999999999, | |
| "grad_norm": 0.4970864914525463, | |
| "learning_rate": 3.496831554038784e-05, | |
| "loss": 0.4112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3754098117351532, | |
| "step": 1155, | |
| "valid_targets_mean": 3324.8, | |
| "valid_targets_min": 626 | |
| }, | |
| { | |
| "epoch": 1.8559999999999999, | |
| "grad_norm": 0.66895938733825, | |
| "learning_rate": 3.490641740699801e-05, | |
| "loss": 0.4285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4742193818092346, | |
| "step": 1160, | |
| "valid_targets_mean": 2048.9, | |
| "valid_targets_min": 619 | |
| }, | |
| { | |
| "epoch": 1.8639999999999999, | |
| "grad_norm": 0.5433939301259606, | |
| "learning_rate": 3.484419637609977e-05, | |
| "loss": 0.4266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40021824836730957, | |
| "step": 1165, | |
| "valid_targets_mean": 2779.8, | |
| "valid_targets_min": 1797 | |
| }, | |
| { | |
| "epoch": 1.8719999999999999, | |
| "grad_norm": 1.0415130550227225, | |
| "learning_rate": 3.478165379550292e-05, | |
| "loss": 0.4339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4287850260734558, | |
| "step": 1170, | |
| "valid_targets_mean": 2352.6, | |
| "valid_targets_min": 833 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 0.5923170984142792, | |
| "learning_rate": 3.471879101998262e-05, | |
| "loss": 0.4601, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44315701723098755, | |
| "step": 1175, | |
| "valid_targets_mean": 2611.9, | |
| "valid_targets_min": 1523 | |
| }, | |
| { | |
| "epoch": 1.888, | |
| "grad_norm": 0.6483765666231087, | |
| "learning_rate": 3.465560941124992e-05, | |
| "loss": 0.4403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.484798789024353, | |
| "step": 1180, | |
| "valid_targets_mean": 2279.2, | |
| "valid_targets_min": 738 | |
| }, | |
| { | |
| "epoch": 1.896, | |
| "grad_norm": 0.5219942435392709, | |
| "learning_rate": 3.459211033792233e-05, | |
| "loss": 0.431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41061511635780334, | |
| "step": 1185, | |
| "valid_targets_mean": 2872.6, | |
| "valid_targets_min": 730 | |
| }, | |
| { | |
| "epoch": 1.904, | |
| "grad_norm": 0.6395729031124877, | |
| "learning_rate": 3.4528295175494194e-05, | |
| "loss": 0.4394, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4299981892108917, | |
| "step": 1190, | |
| "valid_targets_mean": 2462.9, | |
| "valid_targets_min": 796 | |
| }, | |
| { | |
| "epoch": 1.912, | |
| "grad_norm": 0.5643734986587181, | |
| "learning_rate": 3.4464165306306845e-05, | |
| "loss": 0.435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.400689959526062, | |
| "step": 1195, | |
| "valid_targets_mean": 2524.4, | |
| "valid_targets_min": 574 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 0.7243790486340214, | |
| "learning_rate": 3.4399722119518675e-05, | |
| "loss": 0.4532, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4652594029903412, | |
| "step": 1200, | |
| "valid_targets_mean": 2036.8, | |
| "valid_targets_min": 848 | |
| }, | |
| { | |
| "epoch": 1.928, | |
| "grad_norm": 0.5478422029867802, | |
| "learning_rate": 3.433496701107506e-05, | |
| "loss": 0.4267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3938635587692261, | |
| "step": 1205, | |
| "valid_targets_mean": 2800.8, | |
| "valid_targets_min": 1483 | |
| }, | |
| { | |
| "epoch": 1.936, | |
| "grad_norm": 0.6541759373256487, | |
| "learning_rate": 3.426990138367813e-05, | |
| "loss": 0.4301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45855993032455444, | |
| "step": 1210, | |
| "valid_targets_mean": 2101.4, | |
| "valid_targets_min": 870 | |
| }, | |
| { | |
| "epoch": 1.944, | |
| "grad_norm": 0.6136604495164271, | |
| "learning_rate": 3.420452664675633e-05, | |
| "loss": 0.4336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4466904103755951, | |
| "step": 1215, | |
| "valid_targets_mean": 2156.8, | |
| "valid_targets_min": 624 | |
| }, | |
| { | |
| "epoch": 1.952, | |
| "grad_norm": 0.6311990378635277, | |
| "learning_rate": 3.4138844216433946e-05, | |
| "loss": 0.4371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4521297812461853, | |
| "step": 1220, | |
| "valid_targets_mean": 2365.5, | |
| "valid_targets_min": 1199 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 0.5991130836480716, | |
| "learning_rate": 3.407285551550041e-05, | |
| "loss": 0.4384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44246920943260193, | |
| "step": 1225, | |
| "valid_targets_mean": 2326.0, | |
| "valid_targets_min": 1000 | |
| }, | |
| { | |
| "epoch": 1.968, | |
| "grad_norm": 0.672967674746338, | |
| "learning_rate": 3.4006561973379466e-05, | |
| "loss": 0.4581, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4707268476486206, | |
| "step": 1230, | |
| "valid_targets_mean": 2091.6, | |
| "valid_targets_min": 631 | |
| }, | |
| { | |
| "epoch": 1.976, | |
| "grad_norm": 0.5791823883862799, | |
| "learning_rate": 3.3939965026098245e-05, | |
| "loss": 0.4197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4041367173194885, | |
| "step": 1235, | |
| "valid_targets_mean": 2632.2, | |
| "valid_targets_min": 894 | |
| }, | |
| { | |
| "epoch": 1.984, | |
| "grad_norm": 0.5504582884768956, | |
| "learning_rate": 3.38730661162561e-05, | |
| "loss": 0.432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38332805037498474, | |
| "step": 1240, | |
| "valid_targets_mean": 2613.8, | |
| "valid_targets_min": 1539 | |
| }, | |
| { | |
| "epoch": 1.992, | |
| "grad_norm": 0.6019398759021497, | |
| "learning_rate": 3.3805866692993414e-05, | |
| "loss": 0.441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42343538999557495, | |
| "step": 1245, | |
| "valid_targets_mean": 2393.1, | |
| "valid_targets_min": 832 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.6142721800773309, | |
| "learning_rate": 3.373836821196018e-05, | |
| "loss": 0.431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4213658273220062, | |
| "step": 1250, | |
| "valid_targets_mean": 2225.6, | |
| "valid_targets_min": 809 | |
| }, | |
| { | |
| "epoch": 2.008, | |
| "grad_norm": 0.566984260838742, | |
| "learning_rate": 3.3670572135284456e-05, | |
| "loss": 0.4041, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3673563599586487, | |
| "step": 1255, | |
| "valid_targets_mean": 2545.2, | |
| "valid_targets_min": 1030 | |
| }, | |
| { | |
| "epoch": 2.016, | |
| "grad_norm": 0.6282588632444733, | |
| "learning_rate": 3.360247993154073e-05, | |
| "loss": 0.4094, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39245718717575073, | |
| "step": 1260, | |
| "valid_targets_mean": 2090.2, | |
| "valid_targets_min": 774 | |
| }, | |
| { | |
| "epoch": 2.024, | |
| "grad_norm": 0.68740299633641, | |
| "learning_rate": 3.35340930757181e-05, | |
| "loss": 0.4212, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3981652557849884, | |
| "step": 1265, | |
| "valid_targets_mean": 2120.9, | |
| "valid_targets_min": 762 | |
| }, | |
| { | |
| "epoch": 2.032, | |
| "grad_norm": 0.5831769575223239, | |
| "learning_rate": 3.3465413049188276e-05, | |
| "loss": 0.4032, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3730853199958801, | |
| "step": 1270, | |
| "valid_targets_mean": 2617.3, | |
| "valid_targets_min": 1533 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 0.6249850110520228, | |
| "learning_rate": 3.3396441339673564e-05, | |
| "loss": 0.4142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4278792142868042, | |
| "step": 1275, | |
| "valid_targets_mean": 2506.9, | |
| "valid_targets_min": 673 | |
| }, | |
| { | |
| "epoch": 2.048, | |
| "grad_norm": 0.6088510037795549, | |
| "learning_rate": 3.3327179441214574e-05, | |
| "loss": 0.405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42335671186447144, | |
| "step": 1280, | |
| "valid_targets_mean": 2289.5, | |
| "valid_targets_min": 1072 | |
| }, | |
| { | |
| "epoch": 2.056, | |
| "grad_norm": 0.5728322801056469, | |
| "learning_rate": 3.325762885413791e-05, | |
| "loss": 0.4085, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4096108078956604, | |
| "step": 1285, | |
| "valid_targets_mean": 2627.4, | |
| "valid_targets_min": 1331 | |
| }, | |
| { | |
| "epoch": 2.064, | |
| "grad_norm": 0.6842908305749249, | |
| "learning_rate": 3.318779108502362e-05, | |
| "loss": 0.4148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43703195452690125, | |
| "step": 1290, | |
| "valid_targets_mean": 2156.1, | |
| "valid_targets_min": 786 | |
| }, | |
| { | |
| "epoch": 2.072, | |
| "grad_norm": 0.6447442725403991, | |
| "learning_rate": 3.3117667646672616e-05, | |
| "loss": 0.3979, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3940478265285492, | |
| "step": 1295, | |
| "valid_targets_mean": 2325.4, | |
| "valid_targets_min": 740 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 0.6428951801040104, | |
| "learning_rate": 3.304726005807386e-05, | |
| "loss": 0.3826, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40893763303756714, | |
| "step": 1300, | |
| "valid_targets_mean": 2247.2, | |
| "valid_targets_min": 688 | |
| }, | |
| { | |
| "epoch": 2.088, | |
| "grad_norm": 0.6135025402919587, | |
| "learning_rate": 3.297656984437148e-05, | |
| "loss": 0.4101, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3879171311855316, | |
| "step": 1305, | |
| "valid_targets_mean": 2372.0, | |
| "valid_targets_min": 639 | |
| }, | |
| { | |
| "epoch": 2.096, | |
| "grad_norm": 0.6652936744353024, | |
| "learning_rate": 3.2905598536831715e-05, | |
| "loss": 0.4239, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44061246514320374, | |
| "step": 1310, | |
| "valid_targets_mean": 2154.2, | |
| "valid_targets_min": 743 | |
| }, | |
| { | |
| "epoch": 2.104, | |
| "grad_norm": 0.6523375950116365, | |
| "learning_rate": 3.2834347672809776e-05, | |
| "loss": 0.3928, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41073179244995117, | |
| "step": 1315, | |
| "valid_targets_mean": 2811.1, | |
| "valid_targets_min": 1831 | |
| }, | |
| { | |
| "epoch": 2.112, | |
| "grad_norm": 0.6077762062428691, | |
| "learning_rate": 3.276281879571651e-05, | |
| "loss": 0.4217, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39779356122016907, | |
| "step": 1320, | |
| "valid_targets_mean": 2443.5, | |
| "valid_targets_min": 855 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 0.5793040239747499, | |
| "learning_rate": 3.2691013454985006e-05, | |
| "loss": 0.4044, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3803567886352539, | |
| "step": 1325, | |
| "valid_targets_mean": 2550.6, | |
| "valid_targets_min": 786 | |
| }, | |
| { | |
| "epoch": 2.128, | |
| "grad_norm": 0.6680476265501086, | |
| "learning_rate": 3.2618933206036994e-05, | |
| "loss": 0.3901, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4052245020866394, | |
| "step": 1330, | |
| "valid_targets_mean": 2341.2, | |
| "valid_targets_min": 776 | |
| }, | |
| { | |
| "epoch": 2.136, | |
| "grad_norm": 0.6492406169897352, | |
| "learning_rate": 3.2546579610249177e-05, | |
| "loss": 0.4064, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42024415731430054, | |
| "step": 1335, | |
| "valid_targets_mean": 2130.4, | |
| "valid_targets_min": 633 | |
| }, | |
| { | |
| "epoch": 2.144, | |
| "grad_norm": 0.6204846916763979, | |
| "learning_rate": 3.2473954234919386e-05, | |
| "loss": 0.4137, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41287142038345337, | |
| "step": 1340, | |
| "valid_targets_mean": 2284.4, | |
| "valid_targets_min": 628 | |
| }, | |
| { | |
| "epoch": 2.152, | |
| "grad_norm": 0.6233759211732591, | |
| "learning_rate": 3.240105865323266e-05, | |
| "loss": 0.4232, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4033377170562744, | |
| "step": 1345, | |
| "valid_targets_mean": 2149.9, | |
| "valid_targets_min": 522 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.5895554499438427, | |
| "learning_rate": 3.232789444422714e-05, | |
| "loss": 0.3885, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3827977776527405, | |
| "step": 1350, | |
| "valid_targets_mean": 2620.0, | |
| "valid_targets_min": 1318 | |
| }, | |
| { | |
| "epoch": 2.168, | |
| "grad_norm": 0.5777062788179502, | |
| "learning_rate": 3.225446319275988e-05, | |
| "loss": 0.4128, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.383849561214447, | |
| "step": 1355, | |
| "valid_targets_mean": 2806.8, | |
| "valid_targets_min": 1167 | |
| }, | |
| { | |
| "epoch": 2.176, | |
| "grad_norm": 0.6442434899214516, | |
| "learning_rate": 3.218076648947251e-05, | |
| "loss": 0.4079, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4065432548522949, | |
| "step": 1360, | |
| "valid_targets_mean": 2231.4, | |
| "valid_targets_min": 1305 | |
| }, | |
| { | |
| "epoch": 2.184, | |
| "grad_norm": 0.6501192972774998, | |
| "learning_rate": 3.2106805930756804e-05, | |
| "loss": 0.4055, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42025482654571533, | |
| "step": 1365, | |
| "valid_targets_mean": 2148.1, | |
| "valid_targets_min": 670 | |
| }, | |
| { | |
| "epoch": 2.192, | |
| "grad_norm": 0.6526241135703132, | |
| "learning_rate": 3.2032583118720045e-05, | |
| "loss": 0.4223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42404836416244507, | |
| "step": 1370, | |
| "valid_targets_mean": 2050.2, | |
| "valid_targets_min": 627 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.6417831053568079, | |
| "learning_rate": 3.195809966115038e-05, | |
| "loss": 0.4099, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41074275970458984, | |
| "step": 1375, | |
| "valid_targets_mean": 2165.0, | |
| "valid_targets_min": 670 | |
| }, | |
| { | |
| "epoch": 2.208, | |
| "grad_norm": 0.6858826218890438, | |
| "learning_rate": 3.188335717148195e-05, | |
| "loss": 0.4048, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44892221689224243, | |
| "step": 1380, | |
| "valid_targets_mean": 2091.4, | |
| "valid_targets_min": 417 | |
| }, | |
| { | |
| "epoch": 2.216, | |
| "grad_norm": 0.5964347328155009, | |
| "learning_rate": 3.1808357268759964e-05, | |
| "loss": 0.4004, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40602582693099976, | |
| "step": 1385, | |
| "valid_targets_mean": 2580.9, | |
| "valid_targets_min": 777 | |
| }, | |
| { | |
| "epoch": 2.224, | |
| "grad_norm": 0.6055478522763069, | |
| "learning_rate": 3.173310157760563e-05, | |
| "loss": 0.4104, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4049045145511627, | |
| "step": 1390, | |
| "valid_targets_mean": 2326.7, | |
| "valid_targets_min": 618 | |
| }, | |
| { | |
| "epoch": 2.232, | |
| "grad_norm": 0.5682560854626032, | |
| "learning_rate": 3.165759172818093e-05, | |
| "loss": 0.3807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38516765832901, | |
| "step": 1395, | |
| "valid_targets_mean": 2537.3, | |
| "valid_targets_min": 989 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 0.6299417028086615, | |
| "learning_rate": 3.158182935615336e-05, | |
| "loss": 0.4026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40374094247817993, | |
| "step": 1400, | |
| "valid_targets_mean": 2275.2, | |
| "valid_targets_min": 637 | |
| }, | |
| { | |
| "epoch": 2.248, | |
| "grad_norm": 0.6135628464170894, | |
| "learning_rate": 3.150581610266046e-05, | |
| "loss": 0.4128, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40207067131996155, | |
| "step": 1405, | |
| "valid_targets_mean": 2404.6, | |
| "valid_targets_min": 471 | |
| }, | |
| { | |
| "epoch": 2.2560000000000002, | |
| "grad_norm": 0.5163121688960695, | |
| "learning_rate": 3.1429553614274256e-05, | |
| "loss": 0.3811, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34493958950042725, | |
| "step": 1410, | |
| "valid_targets_mean": 2794.7, | |
| "valid_targets_min": 1072 | |
| }, | |
| { | |
| "epoch": 2.2640000000000002, | |
| "grad_norm": 0.5797314117990449, | |
| "learning_rate": 3.1353043542965636e-05, | |
| "loss": 0.395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3778391480445862, | |
| "step": 1415, | |
| "valid_targets_mean": 2505.6, | |
| "valid_targets_min": 1141 | |
| }, | |
| { | |
| "epoch": 2.2720000000000002, | |
| "grad_norm": 0.6351152541067143, | |
| "learning_rate": 3.1276287546068536e-05, | |
| "loss": 0.4089, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4025972783565521, | |
| "step": 1420, | |
| "valid_targets_mean": 2322.2, | |
| "valid_targets_min": 762 | |
| }, | |
| { | |
| "epoch": 2.2800000000000002, | |
| "grad_norm": 0.6347905422148297, | |
| "learning_rate": 3.1199287286244047e-05, | |
| "loss": 0.3766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42649704217910767, | |
| "step": 1425, | |
| "valid_targets_mean": 2317.1, | |
| "valid_targets_min": 747 | |
| }, | |
| { | |
| "epoch": 2.288, | |
| "grad_norm": 0.6944841579796738, | |
| "learning_rate": 3.112204443144438e-05, | |
| "loss": 0.4135, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44431525468826294, | |
| "step": 1430, | |
| "valid_targets_mean": 2030.6, | |
| "valid_targets_min": 747 | |
| }, | |
| { | |
| "epoch": 2.296, | |
| "grad_norm": 0.5977702075080438, | |
| "learning_rate": 3.1044560654876775e-05, | |
| "loss": 0.4128, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4044709801673889, | |
| "step": 1435, | |
| "valid_targets_mean": 2327.9, | |
| "valid_targets_min": 689 | |
| }, | |
| { | |
| "epoch": 2.304, | |
| "grad_norm": 0.6377348502580388, | |
| "learning_rate": 3.0966837634967215e-05, | |
| "loss": 0.4011, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42452406883239746, | |
| "step": 1440, | |
| "valid_targets_mean": 2312.2, | |
| "valid_targets_min": 643 | |
| }, | |
| { | |
| "epoch": 2.312, | |
| "grad_norm": 0.543483855236602, | |
| "learning_rate": 3.088887705532409e-05, | |
| "loss": 0.4199, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3883250951766968, | |
| "step": 1445, | |
| "valid_targets_mean": 2985.1, | |
| "valid_targets_min": 1064 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 0.6436537174269752, | |
| "learning_rate": 3.081068060470174e-05, | |
| "loss": 0.4015, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.399660587310791, | |
| "step": 1450, | |
| "valid_targets_mean": 2157.8, | |
| "valid_targets_min": 635 | |
| }, | |
| { | |
| "epoch": 2.328, | |
| "grad_norm": 0.6554604539338741, | |
| "learning_rate": 3.073224997696385e-05, | |
| "loss": 0.4073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42677703499794006, | |
| "step": 1455, | |
| "valid_targets_mean": 2295.5, | |
| "valid_targets_min": 718 | |
| }, | |
| { | |
| "epoch": 2.336, | |
| "grad_norm": 0.6275787273732197, | |
| "learning_rate": 3.065358687104675e-05, | |
| "loss": 0.3884, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4116145372390747, | |
| "step": 1460, | |
| "valid_targets_mean": 2349.5, | |
| "valid_targets_min": 670 | |
| }, | |
| { | |
| "epoch": 2.344, | |
| "grad_norm": 0.7507823236318536, | |
| "learning_rate": 3.057469299092264e-05, | |
| "loss": 0.4285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4097451865673065, | |
| "step": 1465, | |
| "valid_targets_mean": 2530.2, | |
| "valid_targets_min": 741 | |
| }, | |
| { | |
| "epoch": 2.352, | |
| "grad_norm": 0.5594297562132423, | |
| "learning_rate": 3.0495570045562686e-05, | |
| "loss": 0.4003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37826666235923767, | |
| "step": 1470, | |
| "valid_targets_mean": 2806.1, | |
| "valid_targets_min": 1147 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 0.5374966128720836, | |
| "learning_rate": 3.041621974889996e-05, | |
| "loss": 0.3869, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36974743008613586, | |
| "step": 1475, | |
| "valid_targets_mean": 2765.7, | |
| "valid_targets_min": 943 | |
| }, | |
| { | |
| "epoch": 2.368, | |
| "grad_norm": 0.7071950629766613, | |
| "learning_rate": 3.0336643819792342e-05, | |
| "loss": 0.3986, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44906729459762573, | |
| "step": 1480, | |
| "valid_targets_mean": 1984.7, | |
| "valid_targets_min": 712 | |
| }, | |
| { | |
| "epoch": 2.376, | |
| "grad_norm": 0.6274520592565033, | |
| "learning_rate": 3.0256843981985295e-05, | |
| "loss": 0.4204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45014089345932007, | |
| "step": 1485, | |
| "valid_targets_mean": 2480.5, | |
| "valid_targets_min": 1369 | |
| }, | |
| { | |
| "epoch": 2.384, | |
| "grad_norm": 0.6682182125144448, | |
| "learning_rate": 3.0176821964074503e-05, | |
| "loss": 0.4211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4411892294883728, | |
| "step": 1490, | |
| "valid_targets_mean": 2145.0, | |
| "valid_targets_min": 627 | |
| }, | |
| { | |
| "epoch": 2.392, | |
| "grad_norm": 0.631848940072248, | |
| "learning_rate": 3.009657949946844e-05, | |
| "loss": 0.4099, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4174479842185974, | |
| "step": 1495, | |
| "valid_targets_mean": 2216.5, | |
| "valid_targets_min": 595 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.586918339875622, | |
| "learning_rate": 3.00161183263508e-05, | |
| "loss": 0.4064, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38466623425483704, | |
| "step": 1500, | |
| "valid_targets_mean": 2567.8, | |
| "valid_targets_min": 1635 | |
| }, | |
| { | |
| "epoch": 2.408, | |
| "grad_norm": 0.5723168490058183, | |
| "learning_rate": 2.993544018764289e-05, | |
| "loss": 0.4067, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3870210647583008, | |
| "step": 1505, | |
| "valid_targets_mean": 2610.5, | |
| "valid_targets_min": 619 | |
| }, | |
| { | |
| "epoch": 2.416, | |
| "grad_norm": 0.5238472137602374, | |
| "learning_rate": 2.9854546830965833e-05, | |
| "loss": 0.4087, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3635292649269104, | |
| "step": 1510, | |
| "valid_targets_mean": 2825.7, | |
| "valid_targets_min": 1135 | |
| }, | |
| { | |
| "epoch": 2.424, | |
| "grad_norm": 0.6061179858895165, | |
| "learning_rate": 2.9773440008602736e-05, | |
| "loss": 0.417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4219356179237366, | |
| "step": 1515, | |
| "valid_targets_mean": 2323.0, | |
| "valid_targets_min": 567 | |
| }, | |
| { | |
| "epoch": 2.432, | |
| "grad_norm": 0.6339556380542279, | |
| "learning_rate": 2.96921214774607e-05, | |
| "loss": 0.3996, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4146285057067871, | |
| "step": 1520, | |
| "valid_targets_mean": 2218.8, | |
| "valid_targets_min": 712 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 0.6064884477109023, | |
| "learning_rate": 2.9610592999032815e-05, | |
| "loss": 0.3976, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4029238224029541, | |
| "step": 1525, | |
| "valid_targets_mean": 2360.1, | |
| "valid_targets_min": 676 | |
| }, | |
| { | |
| "epoch": 2.448, | |
| "grad_norm": 0.6089595263918436, | |
| "learning_rate": 2.9528856339359973e-05, | |
| "loss": 0.4143, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42686522006988525, | |
| "step": 1530, | |
| "valid_targets_mean": 2517.9, | |
| "valid_targets_min": 747 | |
| }, | |
| { | |
| "epoch": 2.456, | |
| "grad_norm": 0.653020004015492, | |
| "learning_rate": 2.9446913268992588e-05, | |
| "loss": 0.4114, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4561653733253479, | |
| "step": 1535, | |
| "valid_targets_mean": 2295.3, | |
| "valid_targets_min": 1319 | |
| }, | |
| { | |
| "epoch": 2.464, | |
| "grad_norm": 0.6764309057796736, | |
| "learning_rate": 2.936476556295229e-05, | |
| "loss": 0.4172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4331625998020172, | |
| "step": 1540, | |
| "valid_targets_mean": 2071.8, | |
| "valid_targets_min": 594 | |
| }, | |
| { | |
| "epoch": 2.472, | |
| "grad_norm": 0.6765172223358372, | |
| "learning_rate": 2.928241500069346e-05, | |
| "loss": 0.4172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42376580834388733, | |
| "step": 1545, | |
| "valid_targets_mean": 2154.9, | |
| "valid_targets_min": 726 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 0.6206273355585628, | |
| "learning_rate": 2.9199863366064655e-05, | |
| "loss": 0.4059, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41974562406539917, | |
| "step": 1550, | |
| "valid_targets_mean": 2765.5, | |
| "valid_targets_min": 948 | |
| }, | |
| { | |
| "epoch": 2.488, | |
| "grad_norm": 0.7048376150455586, | |
| "learning_rate": 2.9117112447270007e-05, | |
| "loss": 0.4254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4536668658256531, | |
| "step": 1555, | |
| "valid_targets_mean": 1862.8, | |
| "valid_targets_min": 624 | |
| }, | |
| { | |
| "epoch": 2.496, | |
| "grad_norm": 0.6011963836992713, | |
| "learning_rate": 2.9034164036830462e-05, | |
| "loss": 0.423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3865543007850647, | |
| "step": 1560, | |
| "valid_targets_mean": 2565.4, | |
| "valid_targets_min": 567 | |
| }, | |
| { | |
| "epoch": 2.504, | |
| "grad_norm": 0.6152960192868094, | |
| "learning_rate": 2.8951019931544975e-05, | |
| "loss": 0.4229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42501020431518555, | |
| "step": 1565, | |
| "valid_targets_mean": 2785.4, | |
| "valid_targets_min": 1467 | |
| }, | |
| { | |
| "epoch": 2.512, | |
| "grad_norm": 0.6151356387539295, | |
| "learning_rate": 2.8867681932451544e-05, | |
| "loss": 0.3862, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42337119579315186, | |
| "step": 1570, | |
| "valid_targets_mean": 2417.8, | |
| "valid_targets_min": 1060 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 0.5911162679240172, | |
| "learning_rate": 2.8784151844788267e-05, | |
| "loss": 0.394, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38688942790031433, | |
| "step": 1575, | |
| "valid_targets_mean": 2411.8, | |
| "valid_targets_min": 597 | |
| }, | |
| { | |
| "epoch": 2.528, | |
| "grad_norm": 0.6284051957446064, | |
| "learning_rate": 2.8700431477954155e-05, | |
| "loss": 0.4184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42191997170448303, | |
| "step": 1580, | |
| "valid_targets_mean": 2048.2, | |
| "valid_targets_min": 681 | |
| }, | |
| { | |
| "epoch": 2.536, | |
| "grad_norm": 0.7208603354315725, | |
| "learning_rate": 2.8616522645470012e-05, | |
| "loss": 0.4018, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4383850395679474, | |
| "step": 1585, | |
| "valid_targets_mean": 1752.1, | |
| "valid_targets_min": 570 | |
| }, | |
| { | |
| "epoch": 2.544, | |
| "grad_norm": 0.6112554844178338, | |
| "learning_rate": 2.8532427164939086e-05, | |
| "loss": 0.4121, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3988059163093567, | |
| "step": 1590, | |
| "valid_targets_mean": 2160.6, | |
| "valid_targets_min": 824 | |
| }, | |
| { | |
| "epoch": 2.552, | |
| "grad_norm": 0.5885746331220164, | |
| "learning_rate": 2.844814685800776e-05, | |
| "loss": 0.3962, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41117456555366516, | |
| "step": 1595, | |
| "valid_targets_mean": 2444.8, | |
| "valid_targets_min": 895 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 0.6859235265208016, | |
| "learning_rate": 2.8363683550326028e-05, | |
| "loss": 0.4086, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42529648542404175, | |
| "step": 1600, | |
| "valid_targets_mean": 2296.4, | |
| "valid_targets_min": 511 | |
| }, | |
| { | |
| "epoch": 2.568, | |
| "grad_norm": 0.5933961450763576, | |
| "learning_rate": 2.8279039071508024e-05, | |
| "loss": 0.4134, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41135159134864807, | |
| "step": 1605, | |
| "valid_targets_mean": 2716.2, | |
| "valid_targets_min": 1675 | |
| }, | |
| { | |
| "epoch": 2.576, | |
| "grad_norm": 0.5973688684206384, | |
| "learning_rate": 2.81942152550923e-05, | |
| "loss": 0.4097, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38977688550949097, | |
| "step": 1610, | |
| "valid_targets_mean": 2420.4, | |
| "valid_targets_min": 732 | |
| }, | |
| { | |
| "epoch": 2.584, | |
| "grad_norm": 0.6640208566686371, | |
| "learning_rate": 2.810921393850219e-05, | |
| "loss": 0.4195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4426608979701996, | |
| "step": 1615, | |
| "valid_targets_mean": 2348.2, | |
| "valid_targets_min": 1157 | |
| }, | |
| { | |
| "epoch": 2.592, | |
| "grad_norm": 0.664573850812217, | |
| "learning_rate": 2.802403696300595e-05, | |
| "loss": 0.4022, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42512786388397217, | |
| "step": 1620, | |
| "valid_targets_mean": 1862.1, | |
| "valid_targets_min": 891 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.572539200427506, | |
| "learning_rate": 2.7938686173676915e-05, | |
| "loss": 0.4109, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39695966243743896, | |
| "step": 1625, | |
| "valid_targets_mean": 2581.4, | |
| "valid_targets_min": 872 | |
| }, | |
| { | |
| "epoch": 2.608, | |
| "grad_norm": 0.6499708727510073, | |
| "learning_rate": 2.7853163419353505e-05, | |
| "loss": 0.4196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4214821457862854, | |
| "step": 1630, | |
| "valid_targets_mean": 2067.1, | |
| "valid_targets_min": 668 | |
| }, | |
| { | |
| "epoch": 2.616, | |
| "grad_norm": 0.6694649398545164, | |
| "learning_rate": 2.776747055259918e-05, | |
| "loss": 0.4264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42351481318473816, | |
| "step": 1635, | |
| "valid_targets_mean": 2090.0, | |
| "valid_targets_min": 743 | |
| }, | |
| { | |
| "epoch": 2.624, | |
| "grad_norm": 0.6020293288838131, | |
| "learning_rate": 2.768160942966233e-05, | |
| "loss": 0.4113, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4066402018070221, | |
| "step": 1640, | |
| "valid_targets_mean": 2337.1, | |
| "valid_targets_min": 950 | |
| }, | |
| { | |
| "epoch": 2.632, | |
| "grad_norm": 0.6312642272158415, | |
| "learning_rate": 2.759558191043603e-05, | |
| "loss": 0.412, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41122761368751526, | |
| "step": 1645, | |
| "valid_targets_mean": 2409.6, | |
| "valid_targets_min": 634 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 0.5658199590495661, | |
| "learning_rate": 2.7509389858417783e-05, | |
| "loss": 0.3912, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3818676173686981, | |
| "step": 1650, | |
| "valid_targets_mean": 2471.4, | |
| "valid_targets_min": 852 | |
| }, | |
| { | |
| "epoch": 2.648, | |
| "grad_norm": 0.6723251564057431, | |
| "learning_rate": 2.7423035140669147e-05, | |
| "loss": 0.4001, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3987196981906891, | |
| "step": 1655, | |
| "valid_targets_mean": 2053.6, | |
| "valid_targets_min": 560 | |
| }, | |
| { | |
| "epoch": 2.656, | |
| "grad_norm": 0.54755572109968, | |
| "learning_rate": 2.7336519627775288e-05, | |
| "loss": 0.4021, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38828593492507935, | |
| "step": 1660, | |
| "valid_targets_mean": 2746.6, | |
| "valid_targets_min": 779 | |
| }, | |
| { | |
| "epoch": 2.664, | |
| "grad_norm": 0.5448530396320009, | |
| "learning_rate": 2.724984519380444e-05, | |
| "loss": 0.4073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3946529030799866, | |
| "step": 1665, | |
| "valid_targets_mean": 2920.2, | |
| "valid_targets_min": 1401 | |
| }, | |
| { | |
| "epoch": 2.672, | |
| "grad_norm": 0.703153417370805, | |
| "learning_rate": 2.7163013716267353e-05, | |
| "loss": 0.4169, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42749476432800293, | |
| "step": 1670, | |
| "valid_targets_mean": 2015.0, | |
| "valid_targets_min": 894 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 0.6358114835875868, | |
| "learning_rate": 2.707602707607659e-05, | |
| "loss": 0.4087, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4138975739479065, | |
| "step": 1675, | |
| "valid_targets_mean": 2263.9, | |
| "valid_targets_min": 829 | |
| }, | |
| { | |
| "epoch": 2.6879999999999997, | |
| "grad_norm": 0.5591433391102716, | |
| "learning_rate": 2.6988887157505786e-05, | |
| "loss": 0.4208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39621496200561523, | |
| "step": 1680, | |
| "valid_targets_mean": 2720.2, | |
| "valid_targets_min": 1269 | |
| }, | |
| { | |
| "epoch": 2.6959999999999997, | |
| "grad_norm": 0.6060961673596665, | |
| "learning_rate": 2.6901595848148842e-05, | |
| "loss": 0.3999, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4340288043022156, | |
| "step": 1685, | |
| "valid_targets_mean": 2491.8, | |
| "valid_targets_min": 755 | |
| }, | |
| { | |
| "epoch": 2.7039999999999997, | |
| "grad_norm": 0.5969591273634783, | |
| "learning_rate": 2.681415503887904e-05, | |
| "loss": 0.4123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40641626715660095, | |
| "step": 1690, | |
| "valid_targets_mean": 2428.6, | |
| "valid_targets_min": 754 | |
| }, | |
| { | |
| "epoch": 2.7119999999999997, | |
| "grad_norm": 0.5988970263104237, | |
| "learning_rate": 2.672656662380805e-05, | |
| "loss": 0.3947, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4131797254085541, | |
| "step": 1695, | |
| "valid_targets_mean": 2530.7, | |
| "valid_targets_min": 703 | |
| }, | |
| { | |
| "epoch": 2.7199999999999998, | |
| "grad_norm": 0.580739934515122, | |
| "learning_rate": 2.6638832500244967e-05, | |
| "loss": 0.4172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40965735912323, | |
| "step": 1700, | |
| "valid_targets_mean": 2454.7, | |
| "valid_targets_min": 731 | |
| }, | |
| { | |
| "epoch": 2.7279999999999998, | |
| "grad_norm": 0.6069252169341895, | |
| "learning_rate": 2.655095456865514e-05, | |
| "loss": 0.4041, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41324254870414734, | |
| "step": 1705, | |
| "valid_targets_mean": 2458.7, | |
| "valid_targets_min": 1246 | |
| }, | |
| { | |
| "epoch": 2.7359999999999998, | |
| "grad_norm": 0.5956036832667821, | |
| "learning_rate": 2.6462934732619047e-05, | |
| "loss": 0.4, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40611159801483154, | |
| "step": 1710, | |
| "valid_targets_mean": 2482.6, | |
| "valid_targets_min": 1273 | |
| }, | |
| { | |
| "epoch": 2.7439999999999998, | |
| "grad_norm": 0.6184514343402461, | |
| "learning_rate": 2.6374774898791047e-05, | |
| "loss": 0.4061, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3789522349834442, | |
| "step": 1715, | |
| "valid_targets_mean": 2289.4, | |
| "valid_targets_min": 689 | |
| }, | |
| { | |
| "epoch": 2.752, | |
| "grad_norm": 0.6112887143892226, | |
| "learning_rate": 2.6286476976858084e-05, | |
| "loss": 0.4117, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4043063521385193, | |
| "step": 1720, | |
| "valid_targets_mean": 2493.1, | |
| "valid_targets_min": 779 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 0.6124760782362094, | |
| "learning_rate": 2.619804287949831e-05, | |
| "loss": 0.4078, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4235222041606903, | |
| "step": 1725, | |
| "valid_targets_mean": 2521.1, | |
| "valid_targets_min": 693 | |
| }, | |
| { | |
| "epoch": 2.768, | |
| "grad_norm": 0.5227704496822333, | |
| "learning_rate": 2.6109474522339676e-05, | |
| "loss": 0.3775, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3500251770019531, | |
| "step": 1730, | |
| "valid_targets_mean": 2562.7, | |
| "valid_targets_min": 745 | |
| }, | |
| { | |
| "epoch": 2.776, | |
| "grad_norm": 0.5969991715645667, | |
| "learning_rate": 2.6020773823918414e-05, | |
| "loss": 0.4032, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37266242504119873, | |
| "step": 1735, | |
| "valid_targets_mean": 2158.2, | |
| "valid_targets_min": 575 | |
| }, | |
| { | |
| "epoch": 2.784, | |
| "grad_norm": 0.6267891785393893, | |
| "learning_rate": 2.5931942705637473e-05, | |
| "loss": 0.4021, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4024673402309418, | |
| "step": 1740, | |
| "valid_targets_mean": 2321.1, | |
| "valid_targets_min": 650 | |
| }, | |
| { | |
| "epoch": 2.792, | |
| "grad_norm": 0.6044184430625194, | |
| "learning_rate": 2.5842983091724923e-05, | |
| "loss": 0.4185, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42339223623275757, | |
| "step": 1745, | |
| "valid_targets_mean": 2422.4, | |
| "valid_targets_min": 933 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.5784685545960938, | |
| "learning_rate": 2.575389690919226e-05, | |
| "loss": 0.408, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42380669713020325, | |
| "step": 1750, | |
| "valid_targets_mean": 2760.5, | |
| "valid_targets_min": 1768 | |
| }, | |
| { | |
| "epoch": 2.808, | |
| "grad_norm": 0.6316326299220579, | |
| "learning_rate": 2.5664686087792658e-05, | |
| "loss": 0.3924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40810805559158325, | |
| "step": 1755, | |
| "valid_targets_mean": 2373.4, | |
| "valid_targets_min": 653 | |
| }, | |
| { | |
| "epoch": 2.816, | |
| "grad_norm": 0.6781625675287202, | |
| "learning_rate": 2.5575352559979188e-05, | |
| "loss": 0.4057, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3918643593788147, | |
| "step": 1760, | |
| "valid_targets_mean": 2486.4, | |
| "valid_targets_min": 604 | |
| }, | |
| { | |
| "epoch": 2.824, | |
| "grad_norm": 0.6149082105532682, | |
| "learning_rate": 2.5485898260862936e-05, | |
| "loss": 0.4044, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41537925601005554, | |
| "step": 1765, | |
| "valid_targets_mean": 2468.2, | |
| "valid_targets_min": 639 | |
| }, | |
| { | |
| "epoch": 2.832, | |
| "grad_norm": 0.6370806548762461, | |
| "learning_rate": 2.5396325128171072e-05, | |
| "loss": 0.4147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39408478140830994, | |
| "step": 1770, | |
| "valid_targets_mean": 2442.0, | |
| "valid_targets_min": 608 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 0.5719076536940244, | |
| "learning_rate": 2.5306635102204942e-05, | |
| "loss": 0.4089, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.414425253868103, | |
| "step": 1775, | |
| "valid_targets_mean": 2648.8, | |
| "valid_targets_min": 1276 | |
| }, | |
| { | |
| "epoch": 2.848, | |
| "grad_norm": 0.619047617996578, | |
| "learning_rate": 2.5216830125797943e-05, | |
| "loss": 0.3797, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36515292525291443, | |
| "step": 1780, | |
| "valid_targets_mean": 2069.6, | |
| "valid_targets_min": 615 | |
| }, | |
| { | |
| "epoch": 2.856, | |
| "grad_norm": 0.6064877336124406, | |
| "learning_rate": 2.5126912144273517e-05, | |
| "loss": 0.4104, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4058312773704529, | |
| "step": 1785, | |
| "valid_targets_mean": 2320.6, | |
| "valid_targets_min": 471 | |
| }, | |
| { | |
| "epoch": 2.864, | |
| "grad_norm": 0.670022634304181, | |
| "learning_rate": 2.5036883105402985e-05, | |
| "loss": 0.4091, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40476682782173157, | |
| "step": 1790, | |
| "valid_targets_mean": 1988.9, | |
| "valid_targets_min": 631 | |
| }, | |
| { | |
| "epoch": 2.872, | |
| "grad_norm": 0.6044593976096089, | |
| "learning_rate": 2.4946744959363343e-05, | |
| "loss": 0.3981, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38838961720466614, | |
| "step": 1795, | |
| "valid_targets_mean": 2484.6, | |
| "valid_targets_min": 726 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 0.614236708206963, | |
| "learning_rate": 2.4856499658695018e-05, | |
| "loss": 0.4101, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4145464301109314, | |
| "step": 1800, | |
| "valid_targets_mean": 2283.9, | |
| "valid_targets_min": 812 | |
| }, | |
| { | |
| "epoch": 2.888, | |
| "grad_norm": 0.8773740496005488, | |
| "learning_rate": 2.4766149158259603e-05, | |
| "loss": 0.3976, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3907817304134369, | |
| "step": 1805, | |
| "valid_targets_mean": 2194.6, | |
| "valid_targets_min": 508 | |
| }, | |
| { | |
| "epoch": 2.896, | |
| "grad_norm": 0.6018336839491192, | |
| "learning_rate": 2.4675695415197476e-05, | |
| "loss": 0.396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4207257330417633, | |
| "step": 1810, | |
| "valid_targets_mean": 2481.9, | |
| "valid_targets_min": 735 | |
| }, | |
| { | |
| "epoch": 2.904, | |
| "grad_norm": 0.5685142459911705, | |
| "learning_rate": 2.458514038888543e-05, | |
| "loss": 0.4141, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40741056203842163, | |
| "step": 1815, | |
| "valid_targets_mean": 2612.0, | |
| "valid_targets_min": 887 | |
| }, | |
| { | |
| "epoch": 2.912, | |
| "grad_norm": 0.6983489922705232, | |
| "learning_rate": 2.4494486040894208e-05, | |
| "loss": 0.4072, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44186872243881226, | |
| "step": 1820, | |
| "valid_targets_mean": 2102.1, | |
| "valid_targets_min": 1287 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 0.6147911556976061, | |
| "learning_rate": 2.440373433494603e-05, | |
| "loss": 0.4041, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45045363903045654, | |
| "step": 1825, | |
| "valid_targets_mean": 2486.1, | |
| "valid_targets_min": 523 | |
| }, | |
| { | |
| "epoch": 2.928, | |
| "grad_norm": 0.6271247387935147, | |
| "learning_rate": 2.4312887236872066e-05, | |
| "loss": 0.4171, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4281001091003418, | |
| "step": 1830, | |
| "valid_targets_mean": 2274.2, | |
| "valid_targets_min": 1405 | |
| }, | |
| { | |
| "epoch": 2.936, | |
| "grad_norm": 0.6532889026699363, | |
| "learning_rate": 2.4221946714569803e-05, | |
| "loss": 0.4191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4529232084751129, | |
| "step": 1835, | |
| "valid_targets_mean": 2092.2, | |
| "valid_targets_min": 777 | |
| }, | |
| { | |
| "epoch": 2.944, | |
| "grad_norm": 0.6348842178072138, | |
| "learning_rate": 2.4130914737960472e-05, | |
| "loss": 0.4113, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3983311653137207, | |
| "step": 1840, | |
| "valid_targets_mean": 2224.6, | |
| "valid_targets_min": 755 | |
| }, | |
| { | |
| "epoch": 2.952, | |
| "grad_norm": 0.4937114027035953, | |
| "learning_rate": 2.4039793278946358e-05, | |
| "loss": 0.3708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34252679347991943, | |
| "step": 1845, | |
| "valid_targets_mean": 2861.1, | |
| "valid_targets_min": 824 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.623341775639461, | |
| "learning_rate": 2.394858431136806e-05, | |
| "loss": 0.4127, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4113304018974304, | |
| "step": 1850, | |
| "valid_targets_mean": 2284.8, | |
| "valid_targets_min": 645 | |
| }, | |
| { | |
| "epoch": 2.968, | |
| "grad_norm": 0.5683584704356505, | |
| "learning_rate": 2.385728981096178e-05, | |
| "loss": 0.403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3914598822593689, | |
| "step": 1855, | |
| "valid_targets_mean": 2621.4, | |
| "valid_targets_min": 843 | |
| }, | |
| { | |
| "epoch": 2.976, | |
| "grad_norm": 0.5802616502103262, | |
| "learning_rate": 2.3765911755316503e-05, | |
| "loss": 0.4092, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3963446617126465, | |
| "step": 1860, | |
| "valid_targets_mean": 2564.2, | |
| "valid_targets_min": 617 | |
| }, | |
| { | |
| "epoch": 2.984, | |
| "grad_norm": 0.6163067326469784, | |
| "learning_rate": 2.3674452123831125e-05, | |
| "loss": 0.3884, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4104539155960083, | |
| "step": 1865, | |
| "valid_targets_mean": 2320.2, | |
| "valid_targets_min": 1112 | |
| }, | |
| { | |
| "epoch": 2.992, | |
| "grad_norm": 0.5391263340107644, | |
| "learning_rate": 2.358291289767165e-05, | |
| "loss": 0.4064, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3693543076515198, | |
| "step": 1870, | |
| "valid_targets_mean": 2771.8, | |
| "valid_targets_min": 1090 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.6545761052990515, | |
| "learning_rate": 2.3491296059728202e-05, | |
| "loss": 0.411, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4166029393672943, | |
| "step": 1875, | |
| "valid_targets_mean": 2221.6, | |
| "valid_targets_min": 662 | |
| }, | |
| { | |
| "epoch": 3.008, | |
| "grad_norm": 0.5881439927848966, | |
| "learning_rate": 2.339960359457212e-05, | |
| "loss": 0.372, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37697720527648926, | |
| "step": 1880, | |
| "valid_targets_mean": 2365.9, | |
| "valid_targets_min": 945 | |
| }, | |
| { | |
| "epoch": 3.016, | |
| "grad_norm": 0.6063398287124605, | |
| "learning_rate": 2.3307837488412955e-05, | |
| "loss": 0.3772, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3653172254562378, | |
| "step": 1885, | |
| "valid_targets_mean": 2340.2, | |
| "valid_targets_min": 977 | |
| }, | |
| { | |
| "epoch": 3.024, | |
| "grad_norm": 0.5968531392776985, | |
| "learning_rate": 2.3215999729055437e-05, | |
| "loss": 0.3783, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36922895908355713, | |
| "step": 1890, | |
| "valid_targets_mean": 2414.5, | |
| "valid_targets_min": 590 | |
| }, | |
| { | |
| "epoch": 3.032, | |
| "grad_norm": 0.6375543970755811, | |
| "learning_rate": 2.312409230585641e-05, | |
| "loss": 0.376, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38601815700531006, | |
| "step": 1895, | |
| "valid_targets_mean": 2255.2, | |
| "valid_targets_min": 1107 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 0.6060375860752494, | |
| "learning_rate": 2.3032117209681782e-05, | |
| "loss": 0.3874, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3774688243865967, | |
| "step": 1900, | |
| "valid_targets_mean": 2389.7, | |
| "valid_targets_min": 824 | |
| }, | |
| { | |
| "epoch": 3.048, | |
| "grad_norm": 0.5654895133707849, | |
| "learning_rate": 2.2940076432863335e-05, | |
| "loss": 0.3711, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36035019159317017, | |
| "step": 1905, | |
| "valid_targets_mean": 2650.9, | |
| "valid_targets_min": 779 | |
| }, | |
| { | |
| "epoch": 3.056, | |
| "grad_norm": 0.59516779658439, | |
| "learning_rate": 2.2847971969155626e-05, | |
| "loss": 0.397, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3954877257347107, | |
| "step": 1910, | |
| "valid_targets_mean": 2668.8, | |
| "valid_targets_min": 292 | |
| }, | |
| { | |
| "epoch": 3.064, | |
| "grad_norm": 0.6427203508454588, | |
| "learning_rate": 2.275580581369276e-05, | |
| "loss": 0.4007, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4016428589820862, | |
| "step": 1915, | |
| "valid_targets_mean": 2186.6, | |
| "valid_targets_min": 670 | |
| }, | |
| { | |
| "epoch": 3.072, | |
| "grad_norm": 0.6009841223748552, | |
| "learning_rate": 2.2663579962945205e-05, | |
| "loss": 0.3836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36916252970695496, | |
| "step": 1920, | |
| "valid_targets_mean": 2420.2, | |
| "valid_targets_min": 477 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "grad_norm": 0.5884916954322104, | |
| "learning_rate": 2.2571296414676503e-05, | |
| "loss": 0.3648, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35220831632614136, | |
| "step": 1925, | |
| "valid_targets_mean": 2553.4, | |
| "valid_targets_min": 865 | |
| }, | |
| { | |
| "epoch": 3.088, | |
| "grad_norm": 0.6147850470764658, | |
| "learning_rate": 2.2478957167900038e-05, | |
| "loss": 0.3778, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3545624911785126, | |
| "step": 1930, | |
| "valid_targets_mean": 2651.2, | |
| "valid_targets_min": 906 | |
| }, | |
| { | |
| "epoch": 3.096, | |
| "grad_norm": 0.654324522879676, | |
| "learning_rate": 2.23865642228357e-05, | |
| "loss": 0.3808, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4201942980289459, | |
| "step": 1935, | |
| "valid_targets_mean": 2273.0, | |
| "valid_targets_min": 989 | |
| }, | |
| { | |
| "epoch": 3.104, | |
| "grad_norm": 0.6486633604347383, | |
| "learning_rate": 2.2294119580866592e-05, | |
| "loss": 0.3807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37265709042549133, | |
| "step": 1940, | |
| "valid_targets_mean": 2489.9, | |
| "valid_targets_min": 747 | |
| }, | |
| { | |
| "epoch": 3.112, | |
| "grad_norm": 0.6212204162828813, | |
| "learning_rate": 2.2201625244495646e-05, | |
| "loss": 0.372, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.386108934879303, | |
| "step": 1945, | |
| "valid_targets_mean": 2504.6, | |
| "valid_targets_min": 602 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 0.6195173102616667, | |
| "learning_rate": 2.2109083217302242e-05, | |
| "loss": 0.3909, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3882253170013428, | |
| "step": 1950, | |
| "valid_targets_mean": 2608.1, | |
| "valid_targets_min": 1199 | |
| }, | |
| { | |
| "epoch": 3.128, | |
| "grad_norm": 0.601347688833568, | |
| "learning_rate": 2.201649550389885e-05, | |
| "loss": 0.3773, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37615832686424255, | |
| "step": 1955, | |
| "valid_targets_mean": 2454.6, | |
| "valid_targets_min": 812 | |
| }, | |
| { | |
| "epoch": 3.136, | |
| "grad_norm": 0.5855501844342232, | |
| "learning_rate": 2.1923864109887556e-05, | |
| "loss": 0.3892, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40102967619895935, | |
| "step": 1960, | |
| "valid_targets_mean": 2845.6, | |
| "valid_targets_min": 1626 | |
| }, | |
| { | |
| "epoch": 3.144, | |
| "grad_norm": 0.6462381753806768, | |
| "learning_rate": 2.1831191041816652e-05, | |
| "loss": 0.3798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3896256685256958, | |
| "step": 1965, | |
| "valid_targets_mean": 2207.4, | |
| "valid_targets_min": 691 | |
| }, | |
| { | |
| "epoch": 3.152, | |
| "grad_norm": 0.6447232425326718, | |
| "learning_rate": 2.173847830713715e-05, | |
| "loss": 0.3921, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3892019987106323, | |
| "step": 1970, | |
| "valid_targets_mean": 2329.9, | |
| "valid_targets_min": 1223 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "grad_norm": 0.5537840504045047, | |
| "learning_rate": 2.1645727914159315e-05, | |
| "loss": 0.3646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3495325446128845, | |
| "step": 1975, | |
| "valid_targets_mean": 2758.8, | |
| "valid_targets_min": 1192 | |
| }, | |
| { | |
| "epoch": 3.168, | |
| "grad_norm": 0.5569181058229623, | |
| "learning_rate": 2.1552941872009144e-05, | |
| "loss": 0.3859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3637195825576782, | |
| "step": 1980, | |
| "valid_targets_mean": 2719.5, | |
| "valid_targets_min": 620 | |
| }, | |
| { | |
| "epoch": 3.176, | |
| "grad_norm": 0.6757754955399397, | |
| "learning_rate": 2.1460122190584868e-05, | |
| "loss": 0.4003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44064268469810486, | |
| "step": 1985, | |
| "valid_targets_mean": 2298.9, | |
| "valid_targets_min": 1347 | |
| }, | |
| { | |
| "epoch": 3.184, | |
| "grad_norm": 0.6266172665957364, | |
| "learning_rate": 2.1367270880513377e-05, | |
| "loss": 0.3862, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3712347745895386, | |
| "step": 1990, | |
| "valid_targets_mean": 2372.4, | |
| "valid_targets_min": 678 | |
| }, | |
| { | |
| "epoch": 3.192, | |
| "grad_norm": 0.629095700387527, | |
| "learning_rate": 2.127438995310671e-05, | |
| "loss": 0.3919, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3881651759147644, | |
| "step": 1995, | |
| "valid_targets_mean": 2208.1, | |
| "valid_targets_min": 627 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 0.6152198991611968, | |
| "learning_rate": 2.118148142031846e-05, | |
| "loss": 0.3856, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3625306785106659, | |
| "step": 2000, | |
| "valid_targets_mean": 2492.7, | |
| "valid_targets_min": 875 | |
| }, | |
| { | |
| "epoch": 3.208, | |
| "grad_norm": 0.6833342445465392, | |
| "learning_rate": 2.1088547294700182e-05, | |
| "loss": 0.3841, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4140799641609192, | |
| "step": 2005, | |
| "valid_targets_mean": 2277.3, | |
| "valid_targets_min": 701 | |
| }, | |
| { | |
| "epoch": 3.216, | |
| "grad_norm": 0.5798542496292155, | |
| "learning_rate": 2.0995589589357846e-05, | |
| "loss": 0.3796, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3835946321487427, | |
| "step": 2010, | |
| "valid_targets_mean": 2782.6, | |
| "valid_targets_min": 1224 | |
| }, | |
| { | |
| "epoch": 3.224, | |
| "grad_norm": 0.5664440967682081, | |
| "learning_rate": 2.0902610317908175e-05, | |
| "loss": 0.3621, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3766084313392639, | |
| "step": 2015, | |
| "valid_targets_mean": 2869.3, | |
| "valid_targets_min": 627 | |
| }, | |
| { | |
| "epoch": 3.232, | |
| "grad_norm": 0.6637622837999462, | |
| "learning_rate": 2.080961149443505e-05, | |
| "loss": 0.3888, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37104371190071106, | |
| "step": 2020, | |
| "valid_targets_mean": 2083.4, | |
| "valid_targets_min": 809 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "grad_norm": 0.7978545413525489, | |
| "learning_rate": 2.071659513344589e-05, | |
| "loss": 0.3606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3885534703731537, | |
| "step": 2025, | |
| "valid_targets_mean": 2178.2, | |
| "valid_targets_min": 910 | |
| }, | |
| { | |
| "epoch": 3.248, | |
| "grad_norm": 0.6296550337826623, | |
| "learning_rate": 2.0623563249828e-05, | |
| "loss": 0.3943, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.385465532541275, | |
| "step": 2030, | |
| "valid_targets_mean": 2537.3, | |
| "valid_targets_min": 598 | |
| }, | |
| { | |
| "epoch": 3.2560000000000002, | |
| "grad_norm": 0.5934206085224188, | |
| "learning_rate": 2.053051785880492e-05, | |
| "loss": 0.3702, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3774966299533844, | |
| "step": 2035, | |
| "valid_targets_mean": 2659.2, | |
| "valid_targets_min": 1175 | |
| }, | |
| { | |
| "epoch": 3.2640000000000002, | |
| "grad_norm": 0.7167344747974587, | |
| "learning_rate": 2.0437460975892814e-05, | |
| "loss": 0.401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40590858459472656, | |
| "step": 2040, | |
| "valid_targets_mean": 1961.8, | |
| "valid_targets_min": 777 | |
| }, | |
| { | |
| "epoch": 3.2720000000000002, | |
| "grad_norm": 0.6267710623077165, | |
| "learning_rate": 2.0344394616856736e-05, | |
| "loss": 0.384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38169512152671814, | |
| "step": 2045, | |
| "valid_targets_mean": 2491.3, | |
| "valid_targets_min": 1233 | |
| }, | |
| { | |
| "epoch": 3.2800000000000002, | |
| "grad_norm": 0.5745339719311472, | |
| "learning_rate": 2.0251320797667056e-05, | |
| "loss": 0.3775, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34669169783592224, | |
| "step": 2050, | |
| "valid_targets_mean": 2608.9, | |
| "valid_targets_min": 817 | |
| }, | |
| { | |
| "epoch": 3.288, | |
| "grad_norm": 0.6140351382017055, | |
| "learning_rate": 2.01582415344557e-05, | |
| "loss": 0.3787, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3617323935031891, | |
| "step": 2055, | |
| "valid_targets_mean": 2331.2, | |
| "valid_targets_min": 637 | |
| }, | |
| { | |
| "epoch": 3.296, | |
| "grad_norm": 0.6083097608917135, | |
| "learning_rate": 2.006515884347255e-05, | |
| "loss": 0.379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35356348752975464, | |
| "step": 2060, | |
| "valid_targets_mean": 2350.0, | |
| "valid_targets_min": 931 | |
| }, | |
| { | |
| "epoch": 3.304, | |
| "grad_norm": 0.6638832788004323, | |
| "learning_rate": 1.9972074741041712e-05, | |
| "loss": 0.3858, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36626502871513367, | |
| "step": 2065, | |
| "valid_targets_mean": 2050.3, | |
| "valid_targets_min": 538 | |
| }, | |
| { | |
| "epoch": 3.312, | |
| "grad_norm": 0.6787805881687811, | |
| "learning_rate": 1.9878991243517913e-05, | |
| "loss": 0.3932, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41017746925354004, | |
| "step": 2070, | |
| "valid_targets_mean": 2297.9, | |
| "valid_targets_min": 776 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "grad_norm": 0.6931763136350336, | |
| "learning_rate": 1.9785910367242712e-05, | |
| "loss": 0.3945, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4205699563026428, | |
| "step": 2075, | |
| "valid_targets_mean": 2180.5, | |
| "valid_targets_min": 580 | |
| }, | |
| { | |
| "epoch": 3.328, | |
| "grad_norm": 0.6574199956221226, | |
| "learning_rate": 1.969283412850094e-05, | |
| "loss": 0.3751, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41730862855911255, | |
| "step": 2080, | |
| "valid_targets_mean": 2249.2, | |
| "valid_targets_min": 887 | |
| }, | |
| { | |
| "epoch": 3.336, | |
| "grad_norm": 0.5603282902950534, | |
| "learning_rate": 1.959976454347696e-05, | |
| "loss": 0.3569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32021045684814453, | |
| "step": 2085, | |
| "valid_targets_mean": 2726.4, | |
| "valid_targets_min": 874 | |
| }, | |
| { | |
| "epoch": 3.344, | |
| "grad_norm": 0.6385163168979912, | |
| "learning_rate": 1.950670362821098e-05, | |
| "loss": 0.3619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3663329482078552, | |
| "step": 2090, | |
| "valid_targets_mean": 2306.5, | |
| "valid_targets_min": 883 | |
| }, | |
| { | |
| "epoch": 3.352, | |
| "grad_norm": 0.5837917390861946, | |
| "learning_rate": 1.9413653398555437e-05, | |
| "loss": 0.3772, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3824262320995331, | |
| "step": 2095, | |
| "valid_targets_mean": 2530.4, | |
| "valid_targets_min": 916 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "grad_norm": 0.6366463867100456, | |
| "learning_rate": 1.9320615870131282e-05, | |
| "loss": 0.3701, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3722764849662781, | |
| "step": 2100, | |
| "valid_targets_mean": 2311.1, | |
| "valid_targets_min": 615 | |
| }, | |
| { | |
| "epoch": 3.368, | |
| "grad_norm": 0.618642847487934, | |
| "learning_rate": 1.9227593058284343e-05, | |
| "loss": 0.3752, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3829469084739685, | |
| "step": 2105, | |
| "valid_targets_mean": 2443.9, | |
| "valid_targets_min": 747 | |
| }, | |
| { | |
| "epoch": 3.376, | |
| "grad_norm": 0.6056798347672363, | |
| "learning_rate": 1.9134586978041663e-05, | |
| "loss": 0.3838, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3649381995201111, | |
| "step": 2110, | |
| "valid_targets_mean": 2630.2, | |
| "valid_targets_min": 1193 | |
| }, | |
| { | |
| "epoch": 3.384, | |
| "grad_norm": 0.6808595547961468, | |
| "learning_rate": 1.9041599644067846e-05, | |
| "loss": 0.3869, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40938037633895874, | |
| "step": 2115, | |
| "valid_targets_mean": 1987.3, | |
| "valid_targets_min": 897 | |
| }, | |
| { | |
| "epoch": 3.392, | |
| "grad_norm": 0.5591783619891524, | |
| "learning_rate": 1.8948633070621433e-05, | |
| "loss": 0.3828, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3668754994869232, | |
| "step": 2120, | |
| "valid_targets_mean": 2930.8, | |
| "valid_targets_min": 1054 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 0.5708695292911752, | |
| "learning_rate": 1.885568927151124e-05, | |
| "loss": 0.3869, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36800146102905273, | |
| "step": 2125, | |
| "valid_targets_mean": 2738.0, | |
| "valid_targets_min": 1031 | |
| }, | |
| { | |
| "epoch": 3.408, | |
| "grad_norm": 0.6410323180590372, | |
| "learning_rate": 1.8762770260052773e-05, | |
| "loss": 0.3758, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39123398065567017, | |
| "step": 2130, | |
| "valid_targets_mean": 2428.8, | |
| "valid_targets_min": 370 | |
| }, | |
| { | |
| "epoch": 3.416, | |
| "grad_norm": 0.5958482716495009, | |
| "learning_rate": 1.8669878049024575e-05, | |
| "loss": 0.3926, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37300702929496765, | |
| "step": 2135, | |
| "valid_targets_mean": 2350.7, | |
| "valid_targets_min": 576 | |
| }, | |
| { | |
| "epoch": 3.424, | |
| "grad_norm": 0.6622488113759839, | |
| "learning_rate": 1.857701465062467e-05, | |
| "loss": 0.4018, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4317435622215271, | |
| "step": 2140, | |
| "valid_targets_mean": 2397.9, | |
| "valid_targets_min": 1223 | |
| }, | |
| { | |
| "epoch": 3.432, | |
| "grad_norm": 0.5977111746898944, | |
| "learning_rate": 1.848418207642693e-05, | |
| "loss": 0.3771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39127588272094727, | |
| "step": 2145, | |
| "valid_targets_mean": 2532.9, | |
| "valid_targets_min": 959 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 0.6975469176420807, | |
| "learning_rate": 1.8391382337337548e-05, | |
| "loss": 0.3999, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.419955313205719, | |
| "step": 2150, | |
| "valid_targets_mean": 2115.2, | |
| "valid_targets_min": 786 | |
| }, | |
| { | |
| "epoch": 3.448, | |
| "grad_norm": 0.6151932097541486, | |
| "learning_rate": 1.829861744355144e-05, | |
| "loss": 0.3557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3637232780456543, | |
| "step": 2155, | |
| "valid_targets_mean": 2578.8, | |
| "valid_targets_min": 1331 | |
| }, | |
| { | |
| "epoch": 3.456, | |
| "grad_norm": 0.6255190526192899, | |
| "learning_rate": 1.820588940450872e-05, | |
| "loss": 0.3856, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3912215828895569, | |
| "step": 2160, | |
| "valid_targets_mean": 2437.8, | |
| "valid_targets_min": 939 | |
| }, | |
| { | |
| "epoch": 3.464, | |
| "grad_norm": 0.629666509474719, | |
| "learning_rate": 1.8113200228851163e-05, | |
| "loss": 0.3914, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39584749937057495, | |
| "step": 2165, | |
| "valid_targets_mean": 2337.1, | |
| "valid_targets_min": 792 | |
| }, | |
| { | |
| "epoch": 3.472, | |
| "grad_norm": 0.6216477913146147, | |
| "learning_rate": 1.80205519243787e-05, | |
| "loss": 0.3657, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3581187427043915, | |
| "step": 2170, | |
| "valid_targets_mean": 2355.4, | |
| "valid_targets_min": 818 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "grad_norm": 0.6310163599034911, | |
| "learning_rate": 1.7927946498005934e-05, | |
| "loss": 0.3979, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3783031702041626, | |
| "step": 2175, | |
| "valid_targets_mean": 2337.3, | |
| "valid_targets_min": 950 | |
| }, | |
| { | |
| "epoch": 3.488, | |
| "grad_norm": 0.6238172083353898, | |
| "learning_rate": 1.7835385955718653e-05, | |
| "loss": 0.3915, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38771480321884155, | |
| "step": 2180, | |
| "valid_targets_mean": 2363.9, | |
| "valid_targets_min": 815 | |
| }, | |
| { | |
| "epoch": 3.496, | |
| "grad_norm": 0.6031878486237773, | |
| "learning_rate": 1.7742872302530366e-05, | |
| "loss": 0.3845, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3815397620201111, | |
| "step": 2185, | |
| "valid_targets_mean": 2484.9, | |
| "valid_targets_min": 1550 | |
| }, | |
| { | |
| "epoch": 3.504, | |
| "grad_norm": 0.6494767639468971, | |
| "learning_rate": 1.765040754243892e-05, | |
| "loss": 0.3865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3746333122253418, | |
| "step": 2190, | |
| "valid_targets_mean": 2236.9, | |
| "valid_targets_min": 868 | |
| }, | |
| { | |
| "epoch": 3.512, | |
| "grad_norm": 0.6817120325400415, | |
| "learning_rate": 1.755799367838302e-05, | |
| "loss": 0.3788, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39564692974090576, | |
| "step": 2195, | |
| "valid_targets_mean": 2186.7, | |
| "valid_targets_min": 548 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 0.6338802082931607, | |
| "learning_rate": 1.746563271219891e-05, | |
| "loss": 0.392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4126970171928406, | |
| "step": 2200, | |
| "valid_targets_mean": 2585.1, | |
| "valid_targets_min": 707 | |
| }, | |
| { | |
| "epoch": 3.528, | |
| "grad_norm": 0.6416592297721261, | |
| "learning_rate": 1.7373326644576965e-05, | |
| "loss": 0.3872, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34282779693603516, | |
| "step": 2205, | |
| "valid_targets_mean": 2174.8, | |
| "valid_targets_min": 644 | |
| }, | |
| { | |
| "epoch": 3.536, | |
| "grad_norm": 0.6080200335692698, | |
| "learning_rate": 1.728107747501836e-05, | |
| "loss": 0.383, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37131014466285706, | |
| "step": 2210, | |
| "valid_targets_mean": 2337.9, | |
| "valid_targets_min": 1197 | |
| }, | |
| { | |
| "epoch": 3.544, | |
| "grad_norm": 0.5875911893293823, | |
| "learning_rate": 1.7188887201791785e-05, | |
| "loss": 0.3904, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38630616664886475, | |
| "step": 2215, | |
| "valid_targets_mean": 2732.9, | |
| "valid_targets_min": 626 | |
| }, | |
| { | |
| "epoch": 3.552, | |
| "grad_norm": 0.6679176162146531, | |
| "learning_rate": 1.7096757821890117e-05, | |
| "loss": 0.3899, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39532271027565, | |
| "step": 2220, | |
| "valid_targets_mean": 2232.2, | |
| "valid_targets_min": 1057 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "grad_norm": 0.6082886705221039, | |
| "learning_rate": 1.7004691330987196e-05, | |
| "loss": 0.4012, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38921183347702026, | |
| "step": 2225, | |
| "valid_targets_mean": 2391.8, | |
| "valid_targets_min": 506 | |
| }, | |
| { | |
| "epoch": 3.568, | |
| "grad_norm": 0.6520919498892896, | |
| "learning_rate": 1.691268972339458e-05, | |
| "loss": 0.3753, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40031343698501587, | |
| "step": 2230, | |
| "valid_targets_mean": 2362.3, | |
| "valid_targets_min": 589 | |
| }, | |
| { | |
| "epoch": 3.576, | |
| "grad_norm": 0.6381008434723833, | |
| "learning_rate": 1.6820754992018344e-05, | |
| "loss": 0.3912, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3827167749404907, | |
| "step": 2235, | |
| "valid_targets_mean": 2252.8, | |
| "valid_targets_min": 565 | |
| }, | |
| { | |
| "epoch": 3.584, | |
| "grad_norm": 0.6401934747434895, | |
| "learning_rate": 1.6728889128315932e-05, | |
| "loss": 0.3812, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37738150358200073, | |
| "step": 2240, | |
| "valid_targets_mean": 2236.8, | |
| "valid_targets_min": 545 | |
| }, | |
| { | |
| "epoch": 3.592, | |
| "grad_norm": 0.5616110823405466, | |
| "learning_rate": 1.663709412225297e-05, | |
| "loss": 0.3854, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3371823728084564, | |
| "step": 2245, | |
| "valid_targets_mean": 2436.0, | |
| "valid_targets_min": 537 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 0.6235596557291808, | |
| "learning_rate": 1.654537196226022e-05, | |
| "loss": 0.3906, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3934670388698578, | |
| "step": 2250, | |
| "valid_targets_mean": 2573.6, | |
| "valid_targets_min": 585 | |
| }, | |
| { | |
| "epoch": 3.608, | |
| "grad_norm": 0.635368993888004, | |
| "learning_rate": 1.6453724635190455e-05, | |
| "loss": 0.382, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3701242208480835, | |
| "step": 2255, | |
| "valid_targets_mean": 2438.3, | |
| "valid_targets_min": 619 | |
| }, | |
| { | |
| "epoch": 3.616, | |
| "grad_norm": 0.6118501399881906, | |
| "learning_rate": 1.6362154126275467e-05, | |
| "loss": 0.3831, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3612810969352722, | |
| "step": 2260, | |
| "valid_targets_mean": 2340.6, | |
| "valid_targets_min": 848 | |
| }, | |
| { | |
| "epoch": 3.624, | |
| "grad_norm": 0.6125092689411699, | |
| "learning_rate": 1.6270662419083018e-05, | |
| "loss": 0.3812, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3900102972984314, | |
| "step": 2265, | |
| "valid_targets_mean": 2715.5, | |
| "valid_targets_min": 668 | |
| }, | |
| { | |
| "epoch": 3.632, | |
| "grad_norm": 0.5991814329834635, | |
| "learning_rate": 1.617925149547391e-05, | |
| "loss": 0.3798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3785431683063507, | |
| "step": 2270, | |
| "valid_targets_mean": 2787.3, | |
| "valid_targets_min": 674 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 0.649363117474134, | |
| "learning_rate": 1.608792333555904e-05, | |
| "loss": 0.3819, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39676743745803833, | |
| "step": 2275, | |
| "valid_targets_mean": 2309.2, | |
| "valid_targets_min": 973 | |
| }, | |
| { | |
| "epoch": 3.648, | |
| "grad_norm": 0.7434860302130719, | |
| "learning_rate": 1.5996679917656492e-05, | |
| "loss": 0.3788, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35800135135650635, | |
| "step": 2280, | |
| "valid_targets_mean": 2369.3, | |
| "valid_targets_min": 822 | |
| }, | |
| { | |
| "epoch": 3.656, | |
| "grad_norm": 0.6824484447511117, | |
| "learning_rate": 1.5905523218248723e-05, | |
| "loss": 0.3958, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4209508001804352, | |
| "step": 2285, | |
| "valid_targets_mean": 2079.3, | |
| "valid_targets_min": 605 | |
| }, | |
| { | |
| "epoch": 3.664, | |
| "grad_norm": 0.6571213066551697, | |
| "learning_rate": 1.5814455211939698e-05, | |
| "loss": 0.3844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38445529341697693, | |
| "step": 2290, | |
| "valid_targets_mean": 2158.9, | |
| "valid_targets_min": 645 | |
| }, | |
| { | |
| "epoch": 3.672, | |
| "grad_norm": 0.6698625366113996, | |
| "learning_rate": 1.5723477871412168e-05, | |
| "loss": 0.382, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37412458658218384, | |
| "step": 2295, | |
| "valid_targets_mean": 2017.5, | |
| "valid_targets_min": 645 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 0.5843315979866865, | |
| "learning_rate": 1.56325931673849e-05, | |
| "loss": 0.3732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3598048985004425, | |
| "step": 2300, | |
| "valid_targets_mean": 2778.2, | |
| "valid_targets_min": 669 | |
| }, | |
| { | |
| "epoch": 3.6879999999999997, | |
| "grad_norm": 0.6520409980765114, | |
| "learning_rate": 1.5541803068569993e-05, | |
| "loss": 0.3908, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40478581190109253, | |
| "step": 2305, | |
| "valid_targets_mean": 2315.8, | |
| "valid_targets_min": 779 | |
| }, | |
| { | |
| "epoch": 3.6959999999999997, | |
| "grad_norm": 0.7037994821981977, | |
| "learning_rate": 1.5451109541630275e-05, | |
| "loss": 0.3886, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4001522660255432, | |
| "step": 2310, | |
| "valid_targets_mean": 2318.7, | |
| "valid_targets_min": 1207 | |
| }, | |
| { | |
| "epoch": 3.7039999999999997, | |
| "grad_norm": 0.6534218246018931, | |
| "learning_rate": 1.536051455113663e-05, | |
| "loss": 0.3845, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39225444197654724, | |
| "step": 2315, | |
| "valid_targets_mean": 2325.8, | |
| "valid_targets_min": 679 | |
| }, | |
| { | |
| "epoch": 3.7119999999999997, | |
| "grad_norm": 0.7316026004311299, | |
| "learning_rate": 1.527002005952551e-05, | |
| "loss": 0.3759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41108351945877075, | |
| "step": 2320, | |
| "valid_targets_mean": 1964.9, | |
| "valid_targets_min": 869 | |
| }, | |
| { | |
| "epoch": 3.7199999999999998, | |
| "grad_norm": 0.6992511851816486, | |
| "learning_rate": 1.5179628027056373e-05, | |
| "loss": 0.3918, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3997647762298584, | |
| "step": 2325, | |
| "valid_targets_mean": 2162.9, | |
| "valid_targets_min": 626 | |
| }, | |
| { | |
| "epoch": 3.7279999999999998, | |
| "grad_norm": 0.7600622230811375, | |
| "learning_rate": 1.5089340411769257e-05, | |
| "loss": 0.3879, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39846283197402954, | |
| "step": 2330, | |
| "valid_targets_mean": 1911.8, | |
| "valid_targets_min": 601 | |
| }, | |
| { | |
| "epoch": 3.7359999999999998, | |
| "grad_norm": 0.6310808640241407, | |
| "learning_rate": 1.499915916944236e-05, | |
| "loss": 0.3901, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3807406425476074, | |
| "step": 2335, | |
| "valid_targets_mean": 2493.9, | |
| "valid_targets_min": 731 | |
| }, | |
| { | |
| "epoch": 3.7439999999999998, | |
| "grad_norm": 0.6385737145727371, | |
| "learning_rate": 1.490908625354964e-05, | |
| "loss": 0.3892, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3948187232017517, | |
| "step": 2340, | |
| "valid_targets_mean": 2656.6, | |
| "valid_targets_min": 642 | |
| }, | |
| { | |
| "epoch": 3.752, | |
| "grad_norm": 0.6214858448550059, | |
| "learning_rate": 1.4819123615218556e-05, | |
| "loss": 0.3878, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39337706565856934, | |
| "step": 2345, | |
| "valid_targets_mean": 2589.9, | |
| "valid_targets_min": 581 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 0.6047668156096683, | |
| "learning_rate": 1.472927320318775e-05, | |
| "loss": 0.3588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3817969262599945, | |
| "step": 2350, | |
| "valid_targets_mean": 2547.9, | |
| "valid_targets_min": 387 | |
| }, | |
| { | |
| "epoch": 3.768, | |
| "grad_norm": 0.6856277125915268, | |
| "learning_rate": 1.4639536963764878e-05, | |
| "loss": 0.3828, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4414476454257965, | |
| "step": 2355, | |
| "valid_targets_mean": 2318.7, | |
| "valid_targets_min": 627 | |
| }, | |
| { | |
| "epoch": 3.776, | |
| "grad_norm": 0.6333198424314916, | |
| "learning_rate": 1.4549916840784409e-05, | |
| "loss": 0.3879, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39172789454460144, | |
| "step": 2360, | |
| "valid_targets_mean": 2391.9, | |
| "valid_targets_min": 693 | |
| }, | |
| { | |
| "epoch": 3.784, | |
| "grad_norm": 0.6259500970317692, | |
| "learning_rate": 1.4460414775565555e-05, | |
| "loss": 0.368, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34704911708831787, | |
| "step": 2365, | |
| "valid_targets_mean": 2448.9, | |
| "valid_targets_min": 891 | |
| }, | |
| { | |
| "epoch": 3.792, | |
| "grad_norm": 0.7594578101901227, | |
| "learning_rate": 1.43710327068702e-05, | |
| "loss": 0.3796, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45061349868774414, | |
| "step": 2370, | |
| "valid_targets_mean": 1818.4, | |
| "valid_targets_min": 685 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 0.6490031536890803, | |
| "learning_rate": 1.4281772570860897e-05, | |
| "loss": 0.3716, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3952520787715912, | |
| "step": 2375, | |
| "valid_targets_mean": 2224.6, | |
| "valid_targets_min": 740 | |
| }, | |
| { | |
| "epoch": 3.808, | |
| "grad_norm": 0.6955497814955105, | |
| "learning_rate": 1.4192636301058952e-05, | |
| "loss": 0.38, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41568028926849365, | |
| "step": 2380, | |
| "valid_targets_mean": 2112.8, | |
| "valid_targets_min": 968 | |
| }, | |
| { | |
| "epoch": 3.816, | |
| "grad_norm": 0.5275242338157292, | |
| "learning_rate": 1.4103625828302508e-05, | |
| "loss": 0.3841, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34493115544319153, | |
| "step": 2385, | |
| "valid_targets_mean": 2959.6, | |
| "valid_targets_min": 1957 | |
| }, | |
| { | |
| "epoch": 3.824, | |
| "grad_norm": 0.6859861254603057, | |
| "learning_rate": 1.4014743080704743e-05, | |
| "loss": 0.3844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43106839060783386, | |
| "step": 2390, | |
| "valid_targets_mean": 2140.1, | |
| "valid_targets_min": 507 | |
| }, | |
| { | |
| "epoch": 3.832, | |
| "grad_norm": 0.6306309015014626, | |
| "learning_rate": 1.3925989983612118e-05, | |
| "loss": 0.3795, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3861228823661804, | |
| "step": 2395, | |
| "valid_targets_mean": 2378.4, | |
| "valid_targets_min": 1191 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 0.6393832606090019, | |
| "learning_rate": 1.383736845956261e-05, | |
| "loss": 0.3611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34870201349258423, | |
| "step": 2400, | |
| "valid_targets_mean": 2230.6, | |
| "valid_targets_min": 608 | |
| }, | |
| { | |
| "epoch": 3.848, | |
| "grad_norm": 0.6433318760017774, | |
| "learning_rate": 1.3748880428244154e-05, | |
| "loss": 0.3895, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3842615783214569, | |
| "step": 2405, | |
| "valid_targets_mean": 2342.8, | |
| "valid_targets_min": 683 | |
| }, | |
| { | |
| "epoch": 3.856, | |
| "grad_norm": 0.6385186475370088, | |
| "learning_rate": 1.3660527806452965e-05, | |
| "loss": 0.3729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3636973798274994, | |
| "step": 2410, | |
| "valid_targets_mean": 2115.1, | |
| "valid_targets_min": 629 | |
| }, | |
| { | |
| "epoch": 3.864, | |
| "grad_norm": 0.6777060990479569, | |
| "learning_rate": 1.3572312508052118e-05, | |
| "loss": 0.4042, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4158111810684204, | |
| "step": 2415, | |
| "valid_targets_mean": 2290.8, | |
| "valid_targets_min": 484 | |
| }, | |
| { | |
| "epoch": 3.872, | |
| "grad_norm": 0.6234999879880668, | |
| "learning_rate": 1.3484236443929982e-05, | |
| "loss": 0.366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37164247035980225, | |
| "step": 2420, | |
| "valid_targets_mean": 2397.4, | |
| "valid_targets_min": 677 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "grad_norm": 0.6500653023091283, | |
| "learning_rate": 1.3396301521958926e-05, | |
| "loss": 0.3986, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.406566321849823, | |
| "step": 2425, | |
| "valid_targets_mean": 2353.1, | |
| "valid_targets_min": 724 | |
| }, | |
| { | |
| "epoch": 3.888, | |
| "grad_norm": 0.6199348747856589, | |
| "learning_rate": 1.3308509646953934e-05, | |
| "loss": 0.3767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35002443194389343, | |
| "step": 2430, | |
| "valid_targets_mean": 2550.4, | |
| "valid_targets_min": 626 | |
| }, | |
| { | |
| "epoch": 3.896, | |
| "grad_norm": 0.6118256913662354, | |
| "learning_rate": 1.3220862720631349e-05, | |
| "loss": 0.3801, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37597745656967163, | |
| "step": 2435, | |
| "valid_targets_mean": 2473.8, | |
| "valid_targets_min": 800 | |
| }, | |
| { | |
| "epoch": 3.904, | |
| "grad_norm": 0.6165146793403535, | |
| "learning_rate": 1.3133362641567697e-05, | |
| "loss": 0.3904, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3653344511985779, | |
| "step": 2440, | |
| "valid_targets_mean": 2391.1, | |
| "valid_targets_min": 397 | |
| }, | |
| { | |
| "epoch": 3.912, | |
| "grad_norm": 0.5984853631086248, | |
| "learning_rate": 1.3046011305158546e-05, | |
| "loss": 0.3813, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3555792570114136, | |
| "step": 2445, | |
| "valid_targets_mean": 2439.0, | |
| "valid_targets_min": 788 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 0.6517963353353854, | |
| "learning_rate": 1.2958810603577456e-05, | |
| "loss": 0.3871, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38606372475624084, | |
| "step": 2450, | |
| "valid_targets_mean": 2505.0, | |
| "valid_targets_min": 654 | |
| }, | |
| { | |
| "epoch": 3.928, | |
| "grad_norm": 0.6204479452047876, | |
| "learning_rate": 1.2871762425734989e-05, | |
| "loss": 0.38, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3570050299167633, | |
| "step": 2455, | |
| "valid_targets_mean": 2287.1, | |
| "valid_targets_min": 484 | |
| }, | |
| { | |
| "epoch": 3.936, | |
| "grad_norm": 0.6468990589515311, | |
| "learning_rate": 1.278486865723779e-05, | |
| "loss": 0.3826, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.388399600982666, | |
| "step": 2460, | |
| "valid_targets_mean": 2331.8, | |
| "valid_targets_min": 699 | |
| }, | |
| { | |
| "epoch": 3.944, | |
| "grad_norm": 0.8669461873958879, | |
| "learning_rate": 1.269813118034775e-05, | |
| "loss": 0.3805, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39867132902145386, | |
| "step": 2465, | |
| "valid_targets_mean": 2261.4, | |
| "valid_targets_min": 689 | |
| }, | |
| { | |
| "epoch": 3.952, | |
| "grad_norm": 0.6915041689439219, | |
| "learning_rate": 1.2611551873941213e-05, | |
| "loss": 0.3949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.421209454536438, | |
| "step": 2470, | |
| "valid_targets_mean": 2146.9, | |
| "valid_targets_min": 538 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "grad_norm": 0.5609071746113083, | |
| "learning_rate": 1.2525132613468309e-05, | |
| "loss": 0.4007, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3427595794200897, | |
| "step": 2475, | |
| "valid_targets_mean": 2587.0, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 3.968, | |
| "grad_norm": 0.7370047486079004, | |
| "learning_rate": 1.2438875270912294e-05, | |
| "loss": 0.3862, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4031817317008972, | |
| "step": 2480, | |
| "valid_targets_mean": 1953.1, | |
| "valid_targets_min": 615 | |
| }, | |
| { | |
| "epoch": 3.976, | |
| "grad_norm": 0.5534754324867939, | |
| "learning_rate": 1.2352781714749016e-05, | |
| "loss": 0.3748, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35224318504333496, | |
| "step": 2485, | |
| "valid_targets_mean": 2763.1, | |
| "valid_targets_min": 846 | |
| }, | |
| { | |
| "epoch": 3.984, | |
| "grad_norm": 0.6023895294851896, | |
| "learning_rate": 1.2266853809906469e-05, | |
| "loss": 0.3817, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35587412118911743, | |
| "step": 2490, | |
| "valid_targets_mean": 2325.6, | |
| "valid_targets_min": 573 | |
| }, | |
| { | |
| "epoch": 3.992, | |
| "grad_norm": 0.5933142283195199, | |
| "learning_rate": 1.2181093417724317e-05, | |
| "loss": 0.375, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36003753542900085, | |
| "step": 2495, | |
| "valid_targets_mean": 2494.0, | |
| "valid_targets_min": 949 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.5988758088534348, | |
| "learning_rate": 1.2095502395913676e-05, | |
| "loss": 0.3703, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36336004734039307, | |
| "step": 2500, | |
| "valid_targets_mean": 2681.9, | |
| "valid_targets_min": 1168 | |
| }, | |
| { | |
| "epoch": 4.008, | |
| "grad_norm": 0.659987854618312, | |
| "learning_rate": 1.2010082598516775e-05, | |
| "loss": 0.3587, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3814525604248047, | |
| "step": 2505, | |
| "valid_targets_mean": 2159.9, | |
| "valid_targets_min": 686 | |
| }, | |
| { | |
| "epoch": 4.016, | |
| "grad_norm": 0.6373533465376138, | |
| "learning_rate": 1.1924835875866884e-05, | |
| "loss": 0.3691, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3539432883262634, | |
| "step": 2510, | |
| "valid_targets_mean": 2507.6, | |
| "valid_targets_min": 585 | |
| }, | |
| { | |
| "epoch": 4.024, | |
| "grad_norm": 0.6946756197147173, | |
| "learning_rate": 1.1839764074548145e-05, | |
| "loss": 0.3564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35100963711738586, | |
| "step": 2515, | |
| "valid_targets_mean": 2131.2, | |
| "valid_targets_min": 560 | |
| }, | |
| { | |
| "epoch": 4.032, | |
| "grad_norm": 0.7462607180687901, | |
| "learning_rate": 1.1754869037355659e-05, | |
| "loss": 0.3708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3982384502887726, | |
| "step": 2520, | |
| "valid_targets_mean": 1920.0, | |
| "valid_targets_min": 481 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 0.7206423837270861, | |
| "learning_rate": 1.1670152603255504e-05, | |
| "loss": 0.3772, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4138847887516022, | |
| "step": 2525, | |
| "valid_targets_mean": 2038.5, | |
| "valid_targets_min": 718 | |
| }, | |
| { | |
| "epoch": 4.048, | |
| "grad_norm": 0.6951181641143632, | |
| "learning_rate": 1.1585616607344909e-05, | |
| "loss": 0.3671, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3845224380493164, | |
| "step": 2530, | |
| "valid_targets_mean": 2092.4, | |
| "valid_targets_min": 724 | |
| }, | |
| { | |
| "epoch": 4.056, | |
| "grad_norm": 0.679395145581085, | |
| "learning_rate": 1.1501262880812547e-05, | |
| "loss": 0.3619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39052027463912964, | |
| "step": 2535, | |
| "valid_targets_mean": 2193.2, | |
| "valid_targets_min": 805 | |
| }, | |
| { | |
| "epoch": 4.064, | |
| "grad_norm": 0.5721838907114962, | |
| "learning_rate": 1.141709325089881e-05, | |
| "loss": 0.3583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32457679510116577, | |
| "step": 2540, | |
| "valid_targets_mean": 2973.6, | |
| "valid_targets_min": 1569 | |
| }, | |
| { | |
| "epoch": 4.072, | |
| "grad_norm": 0.6338114426507533, | |
| "learning_rate": 1.1333109540856257e-05, | |
| "loss": 0.3688, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35262852907180786, | |
| "step": 2545, | |
| "valid_targets_mean": 2444.9, | |
| "valid_targets_min": 824 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 0.6708275619659173, | |
| "learning_rate": 1.1249313569910143e-05, | |
| "loss": 0.3703, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3734751343727112, | |
| "step": 2550, | |
| "valid_targets_mean": 2353.8, | |
| "valid_targets_min": 716 | |
| }, | |
| { | |
| "epoch": 4.088, | |
| "grad_norm": 0.6378454319068008, | |
| "learning_rate": 1.1165707153218942e-05, | |
| "loss": 0.3572, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35157474875450134, | |
| "step": 2555, | |
| "valid_targets_mean": 2397.9, | |
| "valid_targets_min": 580 | |
| }, | |
| { | |
| "epoch": 4.096, | |
| "grad_norm": 0.6640784937904208, | |
| "learning_rate": 1.1082292101835121e-05, | |
| "loss": 0.3753, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3626524806022644, | |
| "step": 2560, | |
| "valid_targets_mean": 2538.4, | |
| "valid_targets_min": 877 | |
| }, | |
| { | |
| "epoch": 4.104, | |
| "grad_norm": 0.6217842243587747, | |
| "learning_rate": 1.099907022266582e-05, | |
| "loss": 0.3755, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36767810583114624, | |
| "step": 2565, | |
| "valid_targets_mean": 2459.2, | |
| "valid_targets_min": 743 | |
| }, | |
| { | |
| "epoch": 4.112, | |
| "grad_norm": 0.6808459480743844, | |
| "learning_rate": 1.0916043318433767e-05, | |
| "loss": 0.3597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35825079679489136, | |
| "step": 2570, | |
| "valid_targets_mean": 2529.9, | |
| "valid_targets_min": 834 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "grad_norm": 0.6106311108516793, | |
| "learning_rate": 1.0833213187638203e-05, | |
| "loss": 0.3769, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3592633008956909, | |
| "step": 2575, | |
| "valid_targets_mean": 2422.1, | |
| "valid_targets_min": 1018 | |
| }, | |
| { | |
| "epoch": 4.128, | |
| "grad_norm": 0.6479267624633701, | |
| "learning_rate": 1.0750581624515957e-05, | |
| "loss": 0.3654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37433257699012756, | |
| "step": 2580, | |
| "valid_targets_mean": 2348.7, | |
| "valid_targets_min": 814 | |
| }, | |
| { | |
| "epoch": 4.136, | |
| "grad_norm": 0.6691291039177731, | |
| "learning_rate": 1.0668150419002527e-05, | |
| "loss": 0.3672, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36849620938301086, | |
| "step": 2585, | |
| "valid_targets_mean": 2256.8, | |
| "valid_targets_min": 567 | |
| }, | |
| { | |
| "epoch": 4.144, | |
| "grad_norm": 0.6048408706080803, | |
| "learning_rate": 1.0585921356693349e-05, | |
| "loss": 0.3519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33743125200271606, | |
| "step": 2590, | |
| "valid_targets_mean": 2755.6, | |
| "valid_targets_min": 992 | |
| }, | |
| { | |
| "epoch": 4.152, | |
| "grad_norm": 0.6278027947650242, | |
| "learning_rate": 1.0503896218805112e-05, | |
| "loss": 0.3547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32869869470596313, | |
| "step": 2595, | |
| "valid_targets_mean": 2364.7, | |
| "valid_targets_min": 475 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 0.6472405304733959, | |
| "learning_rate": 1.0422076782137155e-05, | |
| "loss": 0.3565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34370821714401245, | |
| "step": 2600, | |
| "valid_targets_mean": 2329.3, | |
| "valid_targets_min": 650 | |
| }, | |
| { | |
| "epoch": 4.168, | |
| "grad_norm": 0.7061910322339049, | |
| "learning_rate": 1.0340464819032991e-05, | |
| "loss": 0.3632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3962213099002838, | |
| "step": 2605, | |
| "valid_targets_mean": 2245.4, | |
| "valid_targets_min": 900 | |
| }, | |
| { | |
| "epoch": 4.176, | |
| "grad_norm": 0.6378995168442645, | |
| "learning_rate": 1.0259062097341911e-05, | |
| "loss": 0.3638, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3800092339515686, | |
| "step": 2610, | |
| "valid_targets_mean": 2487.3, | |
| "valid_targets_min": 1426 | |
| }, | |
| { | |
| "epoch": 4.184, | |
| "grad_norm": 0.7059748137437399, | |
| "learning_rate": 1.017787038038071e-05, | |
| "loss": 0.3631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35289984941482544, | |
| "step": 2615, | |
| "valid_targets_mean": 1940.3, | |
| "valid_targets_min": 862 | |
| }, | |
| { | |
| "epoch": 4.192, | |
| "grad_norm": 0.6696113201692014, | |
| "learning_rate": 1.0096891426895476e-05, | |
| "loss": 0.3729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3574759364128113, | |
| "step": 2620, | |
| "valid_targets_mean": 2335.2, | |
| "valid_targets_min": 672 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 0.6869753483812769, | |
| "learning_rate": 1.0016126991023447e-05, | |
| "loss": 0.3671, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36144423484802246, | |
| "step": 2625, | |
| "valid_targets_mean": 2039.0, | |
| "valid_targets_min": 684 | |
| }, | |
| { | |
| "epoch": 4.208, | |
| "grad_norm": 0.6559234434297613, | |
| "learning_rate": 9.935578822255113e-06, | |
| "loss": 0.3639, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.361632764339447, | |
| "step": 2630, | |
| "valid_targets_mean": 2439.4, | |
| "valid_targets_min": 948 | |
| }, | |
| { | |
| "epoch": 4.216, | |
| "grad_norm": 0.6599510201021159, | |
| "learning_rate": 9.855248665396218e-06, | |
| "loss": 0.3844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36938899755477905, | |
| "step": 2635, | |
| "valid_targets_mean": 2472.2, | |
| "valid_targets_min": 751 | |
| }, | |
| { | |
| "epoch": 4.224, | |
| "grad_norm": 0.7649708638971318, | |
| "learning_rate": 9.775138260530046e-06, | |
| "loss": 0.3652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40082091093063354, | |
| "step": 2640, | |
| "valid_targets_mean": 1781.4, | |
| "valid_targets_min": 626 | |
| }, | |
| { | |
| "epoch": 4.232, | |
| "grad_norm": 0.6143245924456161, | |
| "learning_rate": 9.695249342979667e-06, | |
| "loss": 0.3429, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3415009379386902, | |
| "step": 2645, | |
| "valid_targets_mean": 2800.2, | |
| "valid_targets_min": 1405 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "grad_norm": 0.5846688132687459, | |
| "learning_rate": 9.615583643270371e-06, | |
| "loss": 0.3559, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32217174768447876, | |
| "step": 2650, | |
| "valid_targets_mean": 2645.8, | |
| "valid_targets_min": 948 | |
| }, | |
| { | |
| "epoch": 4.248, | |
| "grad_norm": 0.6603892872326103, | |
| "learning_rate": 9.536142887092208e-06, | |
| "loss": 0.3762, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.367895245552063, | |
| "step": 2655, | |
| "valid_targets_mean": 2176.4, | |
| "valid_targets_min": 638 | |
| }, | |
| { | |
| "epoch": 4.256, | |
| "grad_norm": 0.6203923641190354, | |
| "learning_rate": 9.456928795262552e-06, | |
| "loss": 0.3632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3404765725135803, | |
| "step": 2660, | |
| "valid_targets_mean": 2438.1, | |
| "valid_targets_min": 891 | |
| }, | |
| { | |
| "epoch": 4.264, | |
| "grad_norm": 0.5972320780019671, | |
| "learning_rate": 9.377943083688873e-06, | |
| "loss": 0.359, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35350432991981506, | |
| "step": 2665, | |
| "valid_targets_mean": 2765.6, | |
| "valid_targets_min": 1749 | |
| }, | |
| { | |
| "epoch": 4.272, | |
| "grad_norm": 0.6514979793709264, | |
| "learning_rate": 9.29918746333153e-06, | |
| "loss": 0.3623, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.373460054397583, | |
| "step": 2670, | |
| "valid_targets_mean": 2471.8, | |
| "valid_targets_min": 1143 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "grad_norm": 0.5759073825154406, | |
| "learning_rate": 9.220663640166756e-06, | |
| "loss": 0.3321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32004714012145996, | |
| "step": 2675, | |
| "valid_targets_mean": 2779.6, | |
| "valid_targets_min": 1575 | |
| }, | |
| { | |
| "epoch": 4.288, | |
| "grad_norm": 0.6467606600817037, | |
| "learning_rate": 9.142373315149655e-06, | |
| "loss": 0.3544, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34785300493240356, | |
| "step": 2680, | |
| "valid_targets_mean": 2261.9, | |
| "valid_targets_min": 735 | |
| }, | |
| { | |
| "epoch": 4.296, | |
| "grad_norm": 0.6203071379346831, | |
| "learning_rate": 9.064318184177373e-06, | |
| "loss": 0.3563, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34045273065567017, | |
| "step": 2685, | |
| "valid_targets_mean": 2397.2, | |
| "valid_targets_min": 714 | |
| }, | |
| { | |
| "epoch": 4.304, | |
| "grad_norm": 0.5913268860832173, | |
| "learning_rate": 8.986499938052396e-06, | |
| "loss": 0.3564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33473074436187744, | |
| "step": 2690, | |
| "valid_targets_mean": 2817.1, | |
| "valid_targets_min": 816 | |
| }, | |
| { | |
| "epoch": 4.312, | |
| "grad_norm": 0.672376259320334, | |
| "learning_rate": 8.908920262445859e-06, | |
| "loss": 0.3752, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3796904683113098, | |
| "step": 2695, | |
| "valid_targets_mean": 2770.5, | |
| "valid_targets_min": 931 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "grad_norm": 0.676521253764822, | |
| "learning_rate": 8.831580837861082e-06, | |
| "loss": 0.3626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3628620207309723, | |
| "step": 2700, | |
| "valid_targets_mean": 2069.5, | |
| "valid_targets_min": 825 | |
| }, | |
| { | |
| "epoch": 4.328, | |
| "grad_norm": 0.6437422459664721, | |
| "learning_rate": 8.754483339597166e-06, | |
| "loss": 0.3791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3925166428089142, | |
| "step": 2705, | |
| "valid_targets_mean": 2486.3, | |
| "valid_targets_min": 850 | |
| }, | |
| { | |
| "epoch": 4.336, | |
| "grad_norm": 0.6908108744201664, | |
| "learning_rate": 8.677629437712665e-06, | |
| "loss": 0.3641, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3935179114341736, | |
| "step": 2710, | |
| "valid_targets_mean": 2178.8, | |
| "valid_targets_min": 576 | |
| }, | |
| { | |
| "epoch": 4.344, | |
| "grad_norm": 0.5759636363073697, | |
| "learning_rate": 8.601020796989467e-06, | |
| "loss": 0.3681, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3588644862174988, | |
| "step": 2715, | |
| "valid_targets_mean": 2973.3, | |
| "valid_targets_min": 1601 | |
| }, | |
| { | |
| "epoch": 4.352, | |
| "grad_norm": 0.7140271324248649, | |
| "learning_rate": 8.524659076896656e-06, | |
| "loss": 0.3752, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39689189195632935, | |
| "step": 2720, | |
| "valid_targets_mean": 2259.2, | |
| "valid_targets_min": 724 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "grad_norm": 0.6334622079660887, | |
| "learning_rate": 8.448545931554652e-06, | |
| "loss": 0.3649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33897995948791504, | |
| "step": 2725, | |
| "valid_targets_mean": 2458.4, | |
| "valid_targets_min": 614 | |
| }, | |
| { | |
| "epoch": 4.368, | |
| "grad_norm": 0.771518737688984, | |
| "learning_rate": 8.372683009699307e-06, | |
| "loss": 0.3802, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40696775913238525, | |
| "step": 2730, | |
| "valid_targets_mean": 2110.1, | |
| "valid_targets_min": 842 | |
| }, | |
| { | |
| "epoch": 4.376, | |
| "grad_norm": 0.6565853512831313, | |
| "learning_rate": 8.297071954646248e-06, | |
| "loss": 0.3717, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34116989374160767, | |
| "step": 2735, | |
| "valid_targets_mean": 2366.1, | |
| "valid_targets_min": 894 | |
| }, | |
| { | |
| "epoch": 4.384, | |
| "grad_norm": 0.7160416008305666, | |
| "learning_rate": 8.22171440425523e-06, | |
| "loss": 0.3621, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3710840940475464, | |
| "step": 2740, | |
| "valid_targets_mean": 2159.7, | |
| "valid_targets_min": 576 | |
| }, | |
| { | |
| "epoch": 4.392, | |
| "grad_norm": 0.5817023955144996, | |
| "learning_rate": 8.146611990894683e-06, | |
| "loss": 0.3483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32857975363731384, | |
| "step": 2745, | |
| "valid_targets_mean": 2690.8, | |
| "valid_targets_min": 1464 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 0.7255898568089918, | |
| "learning_rate": 8.071766341406363e-06, | |
| "loss": 0.3671, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40352243185043335, | |
| "step": 2750, | |
| "valid_targets_mean": 2041.8, | |
| "valid_targets_min": 702 | |
| }, | |
| { | |
| "epoch": 4.408, | |
| "grad_norm": 0.5782232874230977, | |
| "learning_rate": 7.997179077070092e-06, | |
| "loss": 0.3474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32014548778533936, | |
| "step": 2755, | |
| "valid_targets_mean": 2697.8, | |
| "valid_targets_min": 615 | |
| }, | |
| { | |
| "epoch": 4.416, | |
| "grad_norm": 0.6458589269530536, | |
| "learning_rate": 7.92285181356864e-06, | |
| "loss": 0.359, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37554532289505005, | |
| "step": 2760, | |
| "valid_targets_mean": 2480.9, | |
| "valid_targets_min": 951 | |
| }, | |
| { | |
| "epoch": 4.424, | |
| "grad_norm": 0.7021766881006695, | |
| "learning_rate": 7.848786160952726e-06, | |
| "loss": 0.3735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38606834411621094, | |
| "step": 2765, | |
| "valid_targets_mean": 2181.6, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 4.432, | |
| "grad_norm": 0.6274510241809619, | |
| "learning_rate": 7.77498372360617e-06, | |
| "loss": 0.372, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3752686083316803, | |
| "step": 2770, | |
| "valid_targets_mean": 2571.4, | |
| "valid_targets_min": 1247 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "grad_norm": 0.5787589565707946, | |
| "learning_rate": 7.701446100211095e-06, | |
| "loss": 0.3831, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33762532472610474, | |
| "step": 2775, | |
| "valid_targets_mean": 2774.3, | |
| "valid_targets_min": 870 | |
| }, | |
| { | |
| "epoch": 4.448, | |
| "grad_norm": 0.6479393506208262, | |
| "learning_rate": 7.628174883713322e-06, | |
| "loss": 0.3532, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3586277365684509, | |
| "step": 2780, | |
| "valid_targets_mean": 2451.2, | |
| "valid_targets_min": 1111 | |
| }, | |
| { | |
| "epoch": 4.456, | |
| "grad_norm": 0.7036621463124177, | |
| "learning_rate": 7.555171661287875e-06, | |
| "loss": 0.3826, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38187170028686523, | |
| "step": 2785, | |
| "valid_targets_mean": 2242.8, | |
| "valid_targets_min": 1165 | |
| }, | |
| { | |
| "epoch": 4.464, | |
| "grad_norm": 0.7404371740874729, | |
| "learning_rate": 7.482438014304567e-06, | |
| "loss": 0.3625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3355005383491516, | |
| "step": 2790, | |
| "valid_targets_mean": 2598.3, | |
| "valid_targets_min": 496 | |
| }, | |
| { | |
| "epoch": 4.4719999999999995, | |
| "grad_norm": 0.5856491425898046, | |
| "learning_rate": 7.4099755182937685e-06, | |
| "loss": 0.3742, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35368943214416504, | |
| "step": 2795, | |
| "valid_targets_mean": 2671.3, | |
| "valid_targets_min": 716 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 0.7412936316470807, | |
| "learning_rate": 7.337785742912289e-06, | |
| "loss": 0.3762, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38273218274116516, | |
| "step": 2800, | |
| "valid_targets_mean": 1994.7, | |
| "valid_targets_min": 714 | |
| }, | |
| { | |
| "epoch": 4.4879999999999995, | |
| "grad_norm": 0.6176390800611116, | |
| "learning_rate": 7.265870251909335e-06, | |
| "loss": 0.363, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33250662684440613, | |
| "step": 2805, | |
| "valid_targets_mean": 2466.8, | |
| "valid_targets_min": 548 | |
| }, | |
| { | |
| "epoch": 4.496, | |
| "grad_norm": 0.6081405178337042, | |
| "learning_rate": 7.194230603092697e-06, | |
| "loss": 0.3704, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33933448791503906, | |
| "step": 2810, | |
| "valid_targets_mean": 2663.7, | |
| "valid_targets_min": 942 | |
| }, | |
| { | |
| "epoch": 4.504, | |
| "grad_norm": 0.6614502083929048, | |
| "learning_rate": 7.122868348294927e-06, | |
| "loss": 0.3581, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34448274970054626, | |
| "step": 2815, | |
| "valid_targets_mean": 2118.9, | |
| "valid_targets_min": 396 | |
| }, | |
| { | |
| "epoch": 4.5120000000000005, | |
| "grad_norm": 0.6638253505646341, | |
| "learning_rate": 7.051785033339804e-06, | |
| "loss": 0.37, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37338095903396606, | |
| "step": 2820, | |
| "valid_targets_mean": 2290.4, | |
| "valid_targets_min": 484 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "grad_norm": 0.6637334464053732, | |
| "learning_rate": 6.980982198008785e-06, | |
| "loss": 0.3612, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3786088824272156, | |
| "step": 2825, | |
| "valid_targets_mean": 2202.2, | |
| "valid_targets_min": 1287 | |
| }, | |
| { | |
| "epoch": 4.5280000000000005, | |
| "grad_norm": 0.6350340466502454, | |
| "learning_rate": 6.910461376007704e-06, | |
| "loss": 0.3754, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3658716678619385, | |
| "step": 2830, | |
| "valid_targets_mean": 2444.1, | |
| "valid_targets_min": 794 | |
| }, | |
| { | |
| "epoch": 4.536, | |
| "grad_norm": 0.6901975392443126, | |
| "learning_rate": 6.840224094933501e-06, | |
| "loss": 0.3783, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39317822456359863, | |
| "step": 2835, | |
| "valid_targets_mean": 2204.8, | |
| "valid_targets_min": 1276 | |
| }, | |
| { | |
| "epoch": 4.5440000000000005, | |
| "grad_norm": 0.6516495367984168, | |
| "learning_rate": 6.7702718762411505e-06, | |
| "loss": 0.3593, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.369767963886261, | |
| "step": 2840, | |
| "valid_targets_mean": 2537.8, | |
| "valid_targets_min": 1372 | |
| }, | |
| { | |
| "epoch": 4.552, | |
| "grad_norm": 0.6329226215152103, | |
| "learning_rate": 6.700606235210731e-06, | |
| "loss": 0.3512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35097938776016235, | |
| "step": 2845, | |
| "valid_targets_mean": 2429.1, | |
| "valid_targets_min": 1097 | |
| }, | |
| { | |
| "epoch": 4.5600000000000005, | |
| "grad_norm": 0.7490965587225468, | |
| "learning_rate": 6.631228680914558e-06, | |
| "loss": 0.3881, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39271023869514465, | |
| "step": 2850, | |
| "valid_targets_mean": 2262.6, | |
| "valid_targets_min": 654 | |
| }, | |
| { | |
| "epoch": 4.568, | |
| "grad_norm": 0.6301883634712518, | |
| "learning_rate": 6.562140716184515e-06, | |
| "loss": 0.351, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3484507203102112, | |
| "step": 2855, | |
| "valid_targets_mean": 2629.8, | |
| "valid_targets_min": 945 | |
| }, | |
| { | |
| "epoch": 4.576, | |
| "grad_norm": 0.7290224675401246, | |
| "learning_rate": 6.493343837579511e-06, | |
| "loss": 0.3677, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38203197717666626, | |
| "step": 2860, | |
| "valid_targets_mean": 1948.4, | |
| "valid_targets_min": 663 | |
| }, | |
| { | |
| "epoch": 4.584, | |
| "grad_norm": 0.6646731504746918, | |
| "learning_rate": 6.424839535353045e-06, | |
| "loss": 0.3734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3883858025074005, | |
| "step": 2865, | |
| "valid_targets_mean": 2372.2, | |
| "valid_targets_min": 729 | |
| }, | |
| { | |
| "epoch": 4.592, | |
| "grad_norm": 0.6719090701487639, | |
| "learning_rate": 6.356629293420926e-06, | |
| "loss": 0.3708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40090903639793396, | |
| "step": 2870, | |
| "valid_targets_mean": 2423.6, | |
| "valid_targets_min": 681 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 0.5921698249244614, | |
| "learning_rate": 6.28871458932913e-06, | |
| "loss": 0.3708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3327995836734772, | |
| "step": 2875, | |
| "valid_targets_mean": 2652.3, | |
| "valid_targets_min": 683 | |
| }, | |
| { | |
| "epoch": 4.608, | |
| "grad_norm": 0.6236530614592126, | |
| "learning_rate": 6.2210968942218206e-06, | |
| "loss": 0.3673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35564684867858887, | |
| "step": 2880, | |
| "valid_targets_mean": 2732.6, | |
| "valid_targets_min": 1033 | |
| }, | |
| { | |
| "epoch": 4.616, | |
| "grad_norm": 0.6938137658003077, | |
| "learning_rate": 6.153777672809438e-06, | |
| "loss": 0.3705, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3655781149864197, | |
| "step": 2885, | |
| "valid_targets_mean": 2043.9, | |
| "valid_targets_min": 1157 | |
| }, | |
| { | |
| "epoch": 4.624, | |
| "grad_norm": 0.6620370706764436, | |
| "learning_rate": 6.086758383336984e-06, | |
| "loss": 0.376, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38660040497779846, | |
| "step": 2890, | |
| "valid_targets_mean": 2444.5, | |
| "valid_targets_min": 710 | |
| }, | |
| { | |
| "epoch": 4.632, | |
| "grad_norm": 0.6810382275063193, | |
| "learning_rate": 6.0200404775524715e-06, | |
| "loss": 0.3696, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3601524829864502, | |
| "step": 2895, | |
| "valid_targets_mean": 2163.6, | |
| "valid_targets_min": 721 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "grad_norm": 0.5894159349358187, | |
| "learning_rate": 5.9536254006754155e-06, | |
| "loss": 0.3634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3503599166870117, | |
| "step": 2900, | |
| "valid_targets_mean": 2725.6, | |
| "valid_targets_min": 417 | |
| }, | |
| { | |
| "epoch": 4.648, | |
| "grad_norm": 0.6496198594431579, | |
| "learning_rate": 5.887514591365593e-06, | |
| "loss": 0.3586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3693704605102539, | |
| "step": 2905, | |
| "valid_targets_mean": 2608.0, | |
| "valid_targets_min": 1287 | |
| }, | |
| { | |
| "epoch": 4.656, | |
| "grad_norm": 0.6957437044968604, | |
| "learning_rate": 5.821709481691798e-06, | |
| "loss": 0.3611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37342962622642517, | |
| "step": 2910, | |
| "valid_targets_mean": 2159.9, | |
| "valid_targets_min": 704 | |
| }, | |
| { | |
| "epoch": 4.664, | |
| "grad_norm": 0.6264433026530039, | |
| "learning_rate": 5.75621149710091e-06, | |
| "loss": 0.3665, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3909727931022644, | |
| "step": 2915, | |
| "valid_targets_mean": 2693.1, | |
| "valid_targets_min": 1187 | |
| }, | |
| { | |
| "epoch": 4.672, | |
| "grad_norm": 0.6244229147955063, | |
| "learning_rate": 5.691022056386961e-06, | |
| "loss": 0.363, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3780739903450012, | |
| "step": 2920, | |
| "valid_targets_mean": 2513.6, | |
| "valid_targets_min": 926 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "grad_norm": 0.664503211885057, | |
| "learning_rate": 5.6261425716604136e-06, | |
| "loss": 0.3697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37827444076538086, | |
| "step": 2925, | |
| "valid_targets_mean": 2439.2, | |
| "valid_targets_min": 1019 | |
| }, | |
| { | |
| "epoch": 4.688, | |
| "grad_norm": 0.6098168284779346, | |
| "learning_rate": 5.56157444831757e-06, | |
| "loss": 0.3662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3484095335006714, | |
| "step": 2930, | |
| "valid_targets_mean": 2730.8, | |
| "valid_targets_min": 1474 | |
| }, | |
| { | |
| "epoch": 4.696, | |
| "grad_norm": 0.6159163143967894, | |
| "learning_rate": 5.4973190850101334e-06, | |
| "loss": 0.3478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33030563592910767, | |
| "step": 2935, | |
| "valid_targets_mean": 2694.9, | |
| "valid_targets_min": 692 | |
| }, | |
| { | |
| "epoch": 4.704, | |
| "grad_norm": 0.9275789347832416, | |
| "learning_rate": 5.433377873614925e-06, | |
| "loss": 0.372, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36244189739227295, | |
| "step": 2940, | |
| "valid_targets_mean": 2217.1, | |
| "valid_targets_min": 643 | |
| }, | |
| { | |
| "epoch": 4.712, | |
| "grad_norm": 0.6794374048872912, | |
| "learning_rate": 5.369752199203702e-06, | |
| "loss": 0.3752, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36548617482185364, | |
| "step": 2945, | |
| "valid_targets_mean": 2390.0, | |
| "valid_targets_min": 1189 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 0.6481855604133522, | |
| "learning_rate": 5.306443440013171e-06, | |
| "loss": 0.3643, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36283496022224426, | |
| "step": 2950, | |
| "valid_targets_mean": 2627.6, | |
| "valid_targets_min": 1706 | |
| }, | |
| { | |
| "epoch": 4.728, | |
| "grad_norm": 0.6283996969199868, | |
| "learning_rate": 5.243452967415155e-06, | |
| "loss": 0.3517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34274983406066895, | |
| "step": 2955, | |
| "valid_targets_mean": 2579.9, | |
| "valid_targets_min": 570 | |
| }, | |
| { | |
| "epoch": 4.736, | |
| "grad_norm": 0.6221382437380744, | |
| "learning_rate": 5.180782145886846e-06, | |
| "loss": 0.3735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3483470678329468, | |
| "step": 2960, | |
| "valid_targets_mean": 2586.2, | |
| "valid_targets_min": 858 | |
| }, | |
| { | |
| "epoch": 4.744, | |
| "grad_norm": 0.6284889980995106, | |
| "learning_rate": 5.118432332981273e-06, | |
| "loss": 0.3877, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36552244424819946, | |
| "step": 2965, | |
| "valid_targets_mean": 2559.1, | |
| "valid_targets_min": 956 | |
| }, | |
| { | |
| "epoch": 4.752, | |
| "grad_norm": 0.6678458055329841, | |
| "learning_rate": 5.056404879297887e-06, | |
| "loss": 0.3507, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34571802616119385, | |
| "step": 2970, | |
| "valid_targets_mean": 2412.1, | |
| "valid_targets_min": 644 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "grad_norm": 0.6924660368387833, | |
| "learning_rate": 4.994701128453325e-06, | |
| "loss": 0.3617, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39678865671157837, | |
| "step": 2975, | |
| "valid_targets_mean": 2279.1, | |
| "valid_targets_min": 1016 | |
| }, | |
| { | |
| "epoch": 4.768, | |
| "grad_norm": 0.7809541899728214, | |
| "learning_rate": 4.933322417052269e-06, | |
| "loss": 0.3692, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3663102388381958, | |
| "step": 2980, | |
| "valid_targets_mean": 2255.8, | |
| "valid_targets_min": 637 | |
| }, | |
| { | |
| "epoch": 4.776, | |
| "grad_norm": 0.5751616696088279, | |
| "learning_rate": 4.8722700746585135e-06, | |
| "loss": 0.3519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30202430486679077, | |
| "step": 2985, | |
| "valid_targets_mean": 2724.5, | |
| "valid_targets_min": 507 | |
| }, | |
| { | |
| "epoch": 4.784, | |
| "grad_norm": 0.7486775531571893, | |
| "learning_rate": 4.811545423766184e-06, | |
| "loss": 0.3779, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39271706342697144, | |
| "step": 2990, | |
| "valid_targets_mean": 1848.8, | |
| "valid_targets_min": 691 | |
| }, | |
| { | |
| "epoch": 4.792, | |
| "grad_norm": 0.6704024436269385, | |
| "learning_rate": 4.75114977977104e-06, | |
| "loss": 0.3689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35405421257019043, | |
| "step": 2995, | |
| "valid_targets_mean": 2175.5, | |
| "valid_targets_min": 626 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 0.6512417198114752, | |
| "learning_rate": 4.691084450942047e-06, | |
| "loss": 0.3515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35529059171676636, | |
| "step": 3000, | |
| "valid_targets_mean": 2353.8, | |
| "valid_targets_min": 969 | |
| }, | |
| { | |
| "epoch": 4.808, | |
| "grad_norm": 0.6723125318736338, | |
| "learning_rate": 4.631350738392955e-06, | |
| "loss": 0.3671, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3929918110370636, | |
| "step": 3005, | |
| "valid_targets_mean": 2460.7, | |
| "valid_targets_min": 749 | |
| }, | |
| { | |
| "epoch": 4.816, | |
| "grad_norm": 0.6545635253453368, | |
| "learning_rate": 4.571949936054197e-06, | |
| "loss": 0.35, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3621842861175537, | |
| "step": 3010, | |
| "valid_targets_mean": 2419.5, | |
| "valid_targets_min": 1052 | |
| }, | |
| { | |
| "epoch": 4.824, | |
| "grad_norm": 0.8060013677435842, | |
| "learning_rate": 4.512883330644815e-06, | |
| "loss": 0.3816, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.370906263589859, | |
| "step": 3015, | |
| "valid_targets_mean": 2090.8, | |
| "valid_targets_min": 762 | |
| }, | |
| { | |
| "epoch": 4.832, | |
| "grad_norm": 0.785168334536275, | |
| "learning_rate": 4.454152201644591e-06, | |
| "loss": 0.3872, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.394753634929657, | |
| "step": 3020, | |
| "valid_targets_mean": 1937.4, | |
| "valid_targets_min": 601 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 0.6806318836309488, | |
| "learning_rate": 4.395757821266333e-06, | |
| "loss": 0.3591, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36405467987060547, | |
| "step": 3025, | |
| "valid_targets_mean": 2313.8, | |
| "valid_targets_min": 612 | |
| }, | |
| { | |
| "epoch": 4.848, | |
| "grad_norm": 0.7058728895084995, | |
| "learning_rate": 4.337701454428322e-06, | |
| "loss": 0.3654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40474677085876465, | |
| "step": 3030, | |
| "valid_targets_mean": 2222.8, | |
| "valid_targets_min": 669 | |
| }, | |
| { | |
| "epoch": 4.856, | |
| "grad_norm": 0.6290075033695414, | |
| "learning_rate": 4.279984358726925e-06, | |
| "loss": 0.3746, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3571993112564087, | |
| "step": 3035, | |
| "valid_targets_mean": 2527.0, | |
| "valid_targets_min": 871 | |
| }, | |
| { | |
| "epoch": 4.864, | |
| "grad_norm": 0.6736298727323675, | |
| "learning_rate": 4.2226077844093205e-06, | |
| "loss": 0.3614, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3846289813518524, | |
| "step": 3040, | |
| "valid_targets_mean": 2610.7, | |
| "valid_targets_min": 1160 | |
| }, | |
| { | |
| "epoch": 4.872, | |
| "grad_norm": 0.6196230514188242, | |
| "learning_rate": 4.165572974346435e-06, | |
| "loss": 0.355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.354499489068985, | |
| "step": 3045, | |
| "valid_targets_mean": 2464.2, | |
| "valid_targets_min": 666 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "grad_norm": 0.6198778408734426, | |
| "learning_rate": 4.108881164006033e-06, | |
| "loss": 0.3563, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.347675085067749, | |
| "step": 3050, | |
| "valid_targets_mean": 2617.9, | |
| "valid_targets_min": 728 | |
| }, | |
| { | |
| "epoch": 4.888, | |
| "grad_norm": 0.6632240636048407, | |
| "learning_rate": 4.05253358142593e-06, | |
| "loss": 0.3588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3727016746997833, | |
| "step": 3055, | |
| "valid_targets_mean": 2414.1, | |
| "valid_targets_min": 694 | |
| }, | |
| { | |
| "epoch": 4.896, | |
| "grad_norm": 0.6322639539099554, | |
| "learning_rate": 3.9965314471874035e-06, | |
| "loss": 0.3576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3659431040287018, | |
| "step": 3060, | |
| "valid_targets_mean": 2477.1, | |
| "valid_targets_min": 924 | |
| }, | |
| { | |
| "epoch": 4.904, | |
| "grad_norm": 0.6736041760480485, | |
| "learning_rate": 3.940875974388749e-06, | |
| "loss": 0.3732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34606072306632996, | |
| "step": 3065, | |
| "valid_targets_mean": 2141.9, | |
| "valid_targets_min": 442 | |
| }, | |
| { | |
| "epoch": 4.912, | |
| "grad_norm": 0.664830451092538, | |
| "learning_rate": 3.885568368619013e-06, | |
| "loss": 0.3626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3804055452346802, | |
| "step": 3070, | |
| "valid_targets_mean": 2583.8, | |
| "valid_targets_min": 943 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "grad_norm": 0.6231360141274791, | |
| "learning_rate": 3.830609827931877e-06, | |
| "loss": 0.3684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3745899796485901, | |
| "step": 3075, | |
| "valid_targets_mean": 2623.6, | |
| "valid_targets_min": 1711 | |
| }, | |
| { | |
| "epoch": 4.928, | |
| "grad_norm": 0.6575635297911601, | |
| "learning_rate": 3.7760015428196694e-06, | |
| "loss": 0.3598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3489200174808502, | |
| "step": 3080, | |
| "valid_targets_mean": 2478.5, | |
| "valid_targets_min": 977 | |
| }, | |
| { | |
| "epoch": 4.936, | |
| "grad_norm": 0.6571068920250964, | |
| "learning_rate": 3.7217446961876413e-06, | |
| "loss": 0.3716, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36605334281921387, | |
| "step": 3085, | |
| "valid_targets_mean": 2625.0, | |
| "valid_targets_min": 1006 | |
| }, | |
| { | |
| "epoch": 4.944, | |
| "grad_norm": 0.6474105338007999, | |
| "learning_rate": 3.6678404633282826e-06, | |
| "loss": 0.37, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37614741921424866, | |
| "step": 3090, | |
| "valid_targets_mean": 2364.7, | |
| "valid_targets_min": 1127 | |
| }, | |
| { | |
| "epoch": 4.952, | |
| "grad_norm": 0.6552527098199069, | |
| "learning_rate": 3.6142900118959158e-06, | |
| "loss": 0.367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37575167417526245, | |
| "step": 3095, | |
| "valid_targets_mean": 2360.6, | |
| "valid_targets_min": 685 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "grad_norm": 0.7523853247297415, | |
| "learning_rate": 3.561094501881339e-06, | |
| "loss": 0.3693, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3789512515068054, | |
| "step": 3100, | |
| "valid_targets_mean": 1838.3, | |
| "valid_targets_min": 578 | |
| }, | |
| { | |
| "epoch": 4.968, | |
| "grad_norm": 0.704782833537084, | |
| "learning_rate": 3.5082550855867693e-06, | |
| "loss": 0.3831, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3671078085899353, | |
| "step": 3105, | |
| "valid_targets_mean": 2336.1, | |
| "valid_targets_min": 1135 | |
| }, | |
| { | |
| "epoch": 4.976, | |
| "grad_norm": 1.0492112217903906, | |
| "learning_rate": 3.455772907600841e-06, | |
| "loss": 0.3604, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3749602437019348, | |
| "step": 3110, | |
| "valid_targets_mean": 2047.2, | |
| "valid_targets_min": 800 | |
| }, | |
| { | |
| "epoch": 4.984, | |
| "grad_norm": 0.7745551022530366, | |
| "learning_rate": 3.4036491047738075e-06, | |
| "loss": 0.3736, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37036004662513733, | |
| "step": 3115, | |
| "valid_targets_mean": 2268.4, | |
| "valid_targets_min": 985 | |
| }, | |
| { | |
| "epoch": 4.992, | |
| "grad_norm": 0.6234015250941393, | |
| "learning_rate": 3.351884806192933e-06, | |
| "loss": 0.3551, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35156768560409546, | |
| "step": 3120, | |
| "valid_targets_mean": 2443.3, | |
| "valid_targets_min": 654 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.672594729267636, | |
| "learning_rate": 3.3004811331580268e-06, | |
| "loss": 0.3609, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3615241050720215, | |
| "step": 3125, | |
| "valid_targets_mean": 2253.5, | |
| "valid_targets_min": 645 | |
| }, | |
| { | |
| "epoch": 5.008, | |
| "grad_norm": 0.6125356775058436, | |
| "learning_rate": 3.249439199157167e-06, | |
| "loss": 0.3575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3192368745803833, | |
| "step": 3130, | |
| "valid_targets_mean": 2929.1, | |
| "valid_targets_min": 1322 | |
| }, | |
| { | |
| "epoch": 5.016, | |
| "grad_norm": 0.6488989071092053, | |
| "learning_rate": 3.198760109842558e-06, | |
| "loss": 0.3805, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.344971626996994, | |
| "step": 3135, | |
| "valid_targets_mean": 2227.6, | |
| "valid_targets_min": 608 | |
| }, | |
| { | |
| "epoch": 5.024, | |
| "grad_norm": 0.6156448086036765, | |
| "learning_rate": 3.1484449630065894e-06, | |
| "loss": 0.3605, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3354613482952118, | |
| "step": 3140, | |
| "valid_targets_mean": 2545.4, | |
| "valid_targets_min": 796 | |
| }, | |
| { | |
| "epoch": 5.032, | |
| "grad_norm": 0.719895670722136, | |
| "learning_rate": 3.0984948485580736e-06, | |
| "loss": 0.3618, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3566667437553406, | |
| "step": 3145, | |
| "valid_targets_mean": 2109.3, | |
| "valid_targets_min": 763 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "grad_norm": 0.614850938651707, | |
| "learning_rate": 3.048910848498605e-06, | |
| "loss": 0.3521, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3528083264827728, | |
| "step": 3150, | |
| "valid_targets_mean": 2628.0, | |
| "valid_targets_min": 630 | |
| }, | |
| { | |
| "epoch": 5.048, | |
| "grad_norm": 0.7790162967880888, | |
| "learning_rate": 2.9996940368991477e-06, | |
| "loss": 0.3408, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3702874779701233, | |
| "step": 3155, | |
| "valid_targets_mean": 2356.4, | |
| "valid_targets_min": 868 | |
| }, | |
| { | |
| "epoch": 5.056, | |
| "grad_norm": 0.6660344390557594, | |
| "learning_rate": 2.9508454798767516e-06, | |
| "loss": 0.3633, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35248178243637085, | |
| "step": 3160, | |
| "valid_targets_mean": 2445.2, | |
| "valid_targets_min": 1027 | |
| }, | |
| { | |
| "epoch": 5.064, | |
| "grad_norm": 0.638861478860897, | |
| "learning_rate": 2.9023662355714766e-06, | |
| "loss": 0.3676, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3651914894580841, | |
| "step": 3165, | |
| "valid_targets_mean": 2576.9, | |
| "valid_targets_min": 841 | |
| }, | |
| { | |
| "epoch": 5.072, | |
| "grad_norm": 0.5988532849444926, | |
| "learning_rate": 2.8542573541234707e-06, | |
| "loss": 0.3577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3110966682434082, | |
| "step": 3170, | |
| "valid_targets_mean": 3015.4, | |
| "valid_targets_min": 800 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "grad_norm": 0.6054079388440893, | |
| "learning_rate": 2.80651987765018e-06, | |
| "loss": 0.361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3328086733818054, | |
| "step": 3175, | |
| "valid_targets_mean": 2782.4, | |
| "valid_targets_min": 701 | |
| }, | |
| { | |
| "epoch": 5.088, | |
| "grad_norm": 0.7338578008268197, | |
| "learning_rate": 2.759154840223843e-06, | |
| "loss": 0.3517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36060547828674316, | |
| "step": 3180, | |
| "valid_targets_mean": 1945.8, | |
| "valid_targets_min": 719 | |
| }, | |
| { | |
| "epoch": 5.096, | |
| "grad_norm": 0.6685601535146233, | |
| "learning_rate": 2.7121632678490327e-06, | |
| "loss": 0.3578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3761441111564636, | |
| "step": 3185, | |
| "valid_targets_mean": 2452.0, | |
| "valid_targets_min": 948 | |
| }, | |
| { | |
| "epoch": 5.104, | |
| "grad_norm": 0.7688881744820886, | |
| "learning_rate": 2.6655461784404768e-06, | |
| "loss": 0.3548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37389665842056274, | |
| "step": 3190, | |
| "valid_targets_mean": 1935.4, | |
| "valid_targets_min": 648 | |
| }, | |
| { | |
| "epoch": 5.112, | |
| "grad_norm": 0.6905310599554686, | |
| "learning_rate": 2.6193045818009654e-06, | |
| "loss": 0.3617, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36182960867881775, | |
| "step": 3195, | |
| "valid_targets_mean": 2116.4, | |
| "valid_targets_min": 689 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "grad_norm": 0.6320635267289159, | |
| "learning_rate": 2.5734394795995066e-06, | |
| "loss": 0.3624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36028727889060974, | |
| "step": 3200, | |
| "valid_targets_mean": 2626.6, | |
| "valid_targets_min": 1241 | |
| }, | |
| { | |
| "epoch": 5.128, | |
| "grad_norm": 0.6607522571681456, | |
| "learning_rate": 2.5279518653496272e-06, | |
| "loss": 0.3599, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35482197999954224, | |
| "step": 3205, | |
| "valid_targets_mean": 2485.2, | |
| "valid_targets_min": 669 | |
| }, | |
| { | |
| "epoch": 5.136, | |
| "grad_norm": 0.6696357677993036, | |
| "learning_rate": 2.4828427243878307e-06, | |
| "loss": 0.3585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33945372700691223, | |
| "step": 3210, | |
| "valid_targets_mean": 2405.8, | |
| "valid_targets_min": 741 | |
| }, | |
| { | |
| "epoch": 5.144, | |
| "grad_norm": 0.7047794366612183, | |
| "learning_rate": 2.4381130338522762e-06, | |
| "loss": 0.361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37273550033569336, | |
| "step": 3215, | |
| "valid_targets_mean": 2244.8, | |
| "valid_targets_min": 1182 | |
| }, | |
| { | |
| "epoch": 5.152, | |
| "grad_norm": 0.6564416259617804, | |
| "learning_rate": 2.393763762661596e-06, | |
| "loss": 0.3526, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3642486035823822, | |
| "step": 3220, | |
| "valid_targets_mean": 2322.0, | |
| "valid_targets_min": 1177 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "grad_norm": 0.6181350427372484, | |
| "learning_rate": 2.349795871493925e-06, | |
| "loss": 0.3512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33632394671440125, | |
| "step": 3225, | |
| "valid_targets_mean": 2635.9, | |
| "valid_targets_min": 525 | |
| }, | |
| { | |
| "epoch": 5.168, | |
| "grad_norm": 0.6406934295103374, | |
| "learning_rate": 2.3062103127660686e-06, | |
| "loss": 0.366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36425334215164185, | |
| "step": 3230, | |
| "valid_targets_mean": 2557.0, | |
| "valid_targets_min": 1307 | |
| }, | |
| { | |
| "epoch": 5.176, | |
| "grad_norm": 0.6165343586266914, | |
| "learning_rate": 2.2630080306128833e-06, | |
| "loss": 0.3646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32171159982681274, | |
| "step": 3235, | |
| "valid_targets_mean": 2622.6, | |
| "valid_targets_min": 981 | |
| }, | |
| { | |
| "epoch": 5.184, | |
| "grad_norm": 0.6892880210277768, | |
| "learning_rate": 2.2201899608668365e-06, | |
| "loss": 0.3599, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3722003400325775, | |
| "step": 3240, | |
| "valid_targets_mean": 2352.9, | |
| "valid_targets_min": 538 | |
| }, | |
| { | |
| "epoch": 5.192, | |
| "grad_norm": 0.6791780801679136, | |
| "learning_rate": 2.1777570310377084e-06, | |
| "loss": 0.3464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3598274290561676, | |
| "step": 3245, | |
| "valid_targets_mean": 2443.2, | |
| "valid_targets_min": 662 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 0.6806863637030199, | |
| "learning_rate": 2.1357101602925323e-06, | |
| "loss": 0.3593, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35902658104896545, | |
| "step": 3250, | |
| "valid_targets_mean": 2207.4, | |
| "valid_targets_min": 617 | |
| }, | |
| { | |
| "epoch": 5.208, | |
| "grad_norm": 0.7316316341641499, | |
| "learning_rate": 2.0940502594356427e-06, | |
| "loss": 0.3789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37861448526382446, | |
| "step": 3255, | |
| "valid_targets_mean": 2340.1, | |
| "valid_targets_min": 597 | |
| }, | |
| { | |
| "epoch": 5.216, | |
| "grad_norm": 0.7882908631165876, | |
| "learning_rate": 2.052778230888994e-06, | |
| "loss": 0.3552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.386216402053833, | |
| "step": 3260, | |
| "valid_targets_mean": 1980.9, | |
| "valid_targets_min": 539 | |
| }, | |
| { | |
| "epoch": 5.224, | |
| "grad_norm": 0.6483668844257544, | |
| "learning_rate": 2.0118949686725786e-06, | |
| "loss": 0.3598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3690330386161804, | |
| "step": 3265, | |
| "valid_targets_mean": 2679.3, | |
| "valid_targets_min": 1039 | |
| }, | |
| { | |
| "epoch": 5.232, | |
| "grad_norm": 0.6787408597631184, | |
| "learning_rate": 1.971401358385072e-06, | |
| "loss": 0.3596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3550987243652344, | |
| "step": 3270, | |
| "valid_targets_mean": 2274.2, | |
| "valid_targets_min": 712 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "grad_norm": 0.7569513313755559, | |
| "learning_rate": 1.9312982771846435e-06, | |
| "loss": 0.3634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4026515483856201, | |
| "step": 3275, | |
| "valid_targets_mean": 1947.2, | |
| "valid_targets_min": 684 | |
| }, | |
| { | |
| "epoch": 5.248, | |
| "grad_norm": 0.7060321478005621, | |
| "learning_rate": 1.8915865937699652e-06, | |
| "loss": 0.3553, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3544864058494568, | |
| "step": 3280, | |
| "valid_targets_mean": 2002.1, | |
| "valid_targets_min": 710 | |
| }, | |
| { | |
| "epoch": 5.256, | |
| "grad_norm": 0.5588815510320256, | |
| "learning_rate": 1.8522671683613946e-06, | |
| "loss": 0.351, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3070131540298462, | |
| "step": 3285, | |
| "valid_targets_mean": 2963.2, | |
| "valid_targets_min": 661 | |
| }, | |
| { | |
| "epoch": 5.264, | |
| "grad_norm": 0.7256277028506886, | |
| "learning_rate": 1.8133408526823283e-06, | |
| "loss": 0.3662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38448530435562134, | |
| "step": 3290, | |
| "valid_targets_mean": 2194.4, | |
| "valid_targets_min": 698 | |
| }, | |
| { | |
| "epoch": 5.272, | |
| "grad_norm": 0.6579211667621234, | |
| "learning_rate": 1.7748084899407558e-06, | |
| "loss": 0.356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3459991216659546, | |
| "step": 3295, | |
| "valid_targets_mean": 2373.1, | |
| "valid_targets_min": 787 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "grad_norm": 0.6820255470210079, | |
| "learning_rate": 1.7366709148110118e-06, | |
| "loss": 0.3605, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36668434739112854, | |
| "step": 3300, | |
| "valid_targets_mean": 2471.6, | |
| "valid_targets_min": 1419 | |
| }, | |
| { | |
| "epoch": 5.288, | |
| "grad_norm": 0.7082042585335822, | |
| "learning_rate": 1.698928953415675e-06, | |
| "loss": 0.3618, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3762280344963074, | |
| "step": 3305, | |
| "valid_targets_mean": 2216.0, | |
| "valid_targets_min": 774 | |
| }, | |
| { | |
| "epoch": 5.296, | |
| "grad_norm": 0.777879323614007, | |
| "learning_rate": 1.6615834233076756e-06, | |
| "loss": 0.3431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39311477541923523, | |
| "step": 3310, | |
| "valid_targets_mean": 1915.2, | |
| "valid_targets_min": 622 | |
| }, | |
| { | |
| "epoch": 5.304, | |
| "grad_norm": 0.667224442472971, | |
| "learning_rate": 1.6246351334525944e-06, | |
| "loss": 0.3606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35362792015075684, | |
| "step": 3315, | |
| "valid_targets_mean": 2345.8, | |
| "valid_targets_min": 604 | |
| }, | |
| { | |
| "epoch": 5.312, | |
| "grad_norm": 0.6727530950210006, | |
| "learning_rate": 1.5880848842111362e-06, | |
| "loss": 0.3642, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3399882912635803, | |
| "step": 3320, | |
| "valid_targets_mean": 2341.9, | |
| "valid_targets_min": 592 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "grad_norm": 0.6695401723963949, | |
| "learning_rate": 1.5519334673218023e-06, | |
| "loss": 0.3546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35643112659454346, | |
| "step": 3325, | |
| "valid_targets_mean": 2406.8, | |
| "valid_targets_min": 746 | |
| }, | |
| { | |
| "epoch": 5.328, | |
| "grad_norm": 0.6513531697775429, | |
| "learning_rate": 1.5161816658837002e-06, | |
| "loss": 0.3472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3052400052547455, | |
| "step": 3330, | |
| "valid_targets_mean": 2647.8, | |
| "valid_targets_min": 622 | |
| }, | |
| { | |
| "epoch": 5.336, | |
| "grad_norm": 0.7766971137424113, | |
| "learning_rate": 1.4808302543396423e-06, | |
| "loss": 0.3492, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37005147337913513, | |
| "step": 3335, | |
| "valid_targets_mean": 2038.7, | |
| "valid_targets_min": 634 | |
| }, | |
| { | |
| "epoch": 5.344, | |
| "grad_norm": 0.7026028192917165, | |
| "learning_rate": 1.445879998459314e-06, | |
| "loss": 0.3479, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34184879064559937, | |
| "step": 3340, | |
| "valid_targets_mean": 2478.4, | |
| "valid_targets_min": 590 | |
| }, | |
| { | |
| "epoch": 5.352, | |
| "grad_norm": 0.644580097683027, | |
| "learning_rate": 1.4113316553227296e-06, | |
| "loss": 0.3467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3371330499649048, | |
| "step": 3345, | |
| "valid_targets_mean": 2657.6, | |
| "valid_targets_min": 1401 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "grad_norm": 0.705372710572543, | |
| "learning_rate": 1.3771859733037896e-06, | |
| "loss": 0.3447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3721950054168701, | |
| "step": 3350, | |
| "valid_targets_mean": 2235.4, | |
| "valid_targets_min": 753 | |
| }, | |
| { | |
| "epoch": 5.368, | |
| "grad_norm": 0.7041286066838948, | |
| "learning_rate": 1.3434436920541072e-06, | |
| "loss": 0.3619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36865848302841187, | |
| "step": 3355, | |
| "valid_targets_mean": 2148.9, | |
| "valid_targets_min": 992 | |
| }, | |
| { | |
| "epoch": 5.376, | |
| "grad_norm": 0.6598927398730162, | |
| "learning_rate": 1.3101055424869768e-06, | |
| "loss": 0.3491, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36260682344436646, | |
| "step": 3360, | |
| "valid_targets_mean": 2419.1, | |
| "valid_targets_min": 906 | |
| }, | |
| { | |
| "epoch": 5.384, | |
| "grad_norm": 0.7943332000313966, | |
| "learning_rate": 1.2771722467615266e-06, | |
| "loss": 0.3666, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.384222149848938, | |
| "step": 3365, | |
| "valid_targets_mean": 1807.8, | |
| "valid_targets_min": 511 | |
| }, | |
| { | |
| "epoch": 5.392, | |
| "grad_norm": 0.6283790962992163, | |
| "learning_rate": 1.2446445182670818e-06, | |
| "loss": 0.3855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3594076633453369, | |
| "step": 3370, | |
| "valid_targets_mean": 2752.9, | |
| "valid_targets_min": 715 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "grad_norm": 0.6308888988033152, | |
| "learning_rate": 1.21252306160772e-06, | |
| "loss": 0.3579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3309343457221985, | |
| "step": 3375, | |
| "valid_targets_mean": 2615.4, | |
| "valid_targets_min": 1060 | |
| }, | |
| { | |
| "epoch": 5.408, | |
| "grad_norm": 0.6212982070573271, | |
| "learning_rate": 1.1808085725870088e-06, | |
| "loss": 0.357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3432900011539459, | |
| "step": 3380, | |
| "valid_targets_mean": 2605.8, | |
| "valid_targets_min": 1208 | |
| }, | |
| { | |
| "epoch": 5.416, | |
| "grad_norm": 0.6325306117951953, | |
| "learning_rate": 1.1495017381929197e-06, | |
| "loss": 0.3568, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34718793630599976, | |
| "step": 3385, | |
| "valid_targets_mean": 2651.6, | |
| "valid_targets_min": 1221 | |
| }, | |
| { | |
| "epoch": 5.424, | |
| "grad_norm": 0.6551884609744554, | |
| "learning_rate": 1.1186032365829514e-06, | |
| "loss": 0.3524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3209971487522125, | |
| "step": 3390, | |
| "valid_targets_mean": 2383.6, | |
| "valid_targets_min": 830 | |
| }, | |
| { | |
| "epoch": 5.432, | |
| "grad_norm": 0.6861395147132316, | |
| "learning_rate": 1.088113737069456e-06, | |
| "loss": 0.3616, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3504214286804199, | |
| "step": 3395, | |
| "valid_targets_mean": 2286.2, | |
| "valid_targets_min": 627 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "grad_norm": 0.7142301912993754, | |
| "learning_rate": 1.0580339001051153e-06, | |
| "loss": 0.3658, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4048931300640106, | |
| "step": 3400, | |
| "valid_targets_mean": 2164.6, | |
| "valid_targets_min": 1069 | |
| }, | |
| { | |
| "epoch": 5.448, | |
| "grad_norm": 0.6852209256299653, | |
| "learning_rate": 1.0283643772686535e-06, | |
| "loss": 0.3476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3515816926956177, | |
| "step": 3405, | |
| "valid_targets_mean": 2186.8, | |
| "valid_targets_min": 535 | |
| }, | |
| { | |
| "epoch": 5.456, | |
| "grad_norm": 0.7376221866961876, | |
| "learning_rate": 9.991058112507113e-07, | |
| "loss": 0.3505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40202945470809937, | |
| "step": 3410, | |
| "valid_targets_mean": 2289.1, | |
| "valid_targets_min": 779 | |
| }, | |
| { | |
| "epoch": 5.464, | |
| "grad_norm": 0.6144769599130704, | |
| "learning_rate": 9.702588358399345e-07, | |
| "loss": 0.3583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3659561276435852, | |
| "step": 3415, | |
| "valid_targets_mean": 2851.7, | |
| "valid_targets_min": 847 | |
| }, | |
| { | |
| "epoch": 5.4719999999999995, | |
| "grad_norm": 0.6817618706864722, | |
| "learning_rate": 9.418240759092434e-07, | |
| "loss": 0.3637, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3654489517211914, | |
| "step": 3420, | |
| "valid_targets_mean": 2237.9, | |
| "valid_targets_min": 652 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "grad_norm": 0.6663119774773926, | |
| "learning_rate": 9.138021474022763e-07, | |
| "loss": 0.3598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37460604310035706, | |
| "step": 3425, | |
| "valid_targets_mean": 2327.5, | |
| "valid_targets_min": 620 | |
| }, | |
| { | |
| "epoch": 5.4879999999999995, | |
| "grad_norm": 0.6716553399135632, | |
| "learning_rate": 8.861936573200825e-07, | |
| "loss": 0.363, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3525908589363098, | |
| "step": 3430, | |
| "valid_targets_mean": 2574.8, | |
| "valid_targets_min": 1182 | |
| }, | |
| { | |
| "epoch": 5.496, | |
| "grad_norm": 0.6735504443339394, | |
| "learning_rate": 8.58999203707942e-07, | |
| "loss": 0.3678, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38089731335639954, | |
| "step": 3435, | |
| "valid_targets_mean": 2493.9, | |
| "valid_targets_min": 883 | |
| }, | |
| { | |
| "epoch": 5.504, | |
| "grad_norm": 0.663972336719367, | |
| "learning_rate": 8.322193756424401e-07, | |
| "loss": 0.3465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34215810894966125, | |
| "step": 3440, | |
| "valid_targets_mean": 2357.2, | |
| "valid_targets_min": 812 | |
| }, | |
| { | |
| "epoch": 5.5120000000000005, | |
| "grad_norm": 0.6192143246660865, | |
| "learning_rate": 8.058547532186667e-07, | |
| "loss": 0.3542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3521275520324707, | |
| "step": 3445, | |
| "valid_targets_mean": 2753.1, | |
| "valid_targets_min": 1714 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "grad_norm": 0.6953540634211431, | |
| "learning_rate": 7.799059075376991e-07, | |
| "loss": 0.3623, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3627205193042755, | |
| "step": 3450, | |
| "valid_targets_mean": 2349.4, | |
| "valid_targets_min": 544 | |
| }, | |
| { | |
| "epoch": 5.5280000000000005, | |
| "grad_norm": 0.6802926606310232, | |
| "learning_rate": 7.54373400694195e-07, | |
| "loss": 0.3546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3609620928764343, | |
| "step": 3455, | |
| "valid_targets_mean": 2452.8, | |
| "valid_targets_min": 782 | |
| }, | |
| { | |
| "epoch": 5.536, | |
| "grad_norm": 0.6634915525384147, | |
| "learning_rate": 7.292577857642302e-07, | |
| "loss": 0.357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3651014566421509, | |
| "step": 3460, | |
| "valid_targets_mean": 2629.6, | |
| "valid_targets_min": 761 | |
| }, | |
| { | |
| "epoch": 5.5440000000000005, | |
| "grad_norm": 0.63417551257205, | |
| "learning_rate": 7.045596067933158e-07, | |
| "loss": 0.3587, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32456403970718384, | |
| "step": 3465, | |
| "valid_targets_mean": 2452.6, | |
| "valid_targets_min": 915 | |
| }, | |
| { | |
| "epoch": 5.552, | |
| "grad_norm": 0.5803948039648736, | |
| "learning_rate": 6.80279398784609e-07, | |
| "loss": 0.3381, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31592857837677, | |
| "step": 3470, | |
| "valid_targets_mean": 2696.3, | |
| "valid_targets_min": 1633 | |
| }, | |
| { | |
| "epoch": 5.5600000000000005, | |
| "grad_norm": 0.6857358254181065, | |
| "learning_rate": 6.56417687687343e-07, | |
| "loss": 0.3387, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34818774461746216, | |
| "step": 3475, | |
| "valid_targets_mean": 2154.4, | |
| "valid_targets_min": 548 | |
| }, | |
| { | |
| "epoch": 5.568, | |
| "grad_norm": 0.6581872551853069, | |
| "learning_rate": 6.329749903854066e-07, | |
| "loss": 0.3421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3282102942466736, | |
| "step": 3480, | |
| "valid_targets_mean": 2525.1, | |
| "valid_targets_min": 747 | |
| }, | |
| { | |
| "epoch": 5.576, | |
| "grad_norm": 0.7037981685088222, | |
| "learning_rate": 6.099518146861628e-07, | |
| "loss": 0.3565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34629759192466736, | |
| "step": 3485, | |
| "valid_targets_mean": 2143.9, | |
| "valid_targets_min": 948 | |
| }, | |
| { | |
| "epoch": 5.584, | |
| "grad_norm": 0.6977519908519918, | |
| "learning_rate": 5.873486593094546e-07, | |
| "loss": 0.3591, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3663643002510071, | |
| "step": 3490, | |
| "valid_targets_mean": 2090.2, | |
| "valid_targets_min": 631 | |
| }, | |
| { | |
| "epoch": 5.592, | |
| "grad_norm": 0.612283944786415, | |
| "learning_rate": 5.651660138767834e-07, | |
| "loss": 0.3576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3275211453437805, | |
| "step": 3495, | |
| "valid_targets_mean": 2624.4, | |
| "valid_targets_min": 698 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "grad_norm": 0.751829222915938, | |
| "learning_rate": 5.434043589007232e-07, | |
| "loss": 0.3413, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3469979763031006, | |
| "step": 3500, | |
| "valid_targets_mean": 2564.5, | |
| "valid_targets_min": 924 | |
| }, | |
| { | |
| "epoch": 5.608, | |
| "grad_norm": 0.7027860247672669, | |
| "learning_rate": 5.220641657744963e-07, | |
| "loss": 0.3619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36408111453056335, | |
| "step": 3505, | |
| "valid_targets_mean": 2274.8, | |
| "valid_targets_min": 618 | |
| }, | |
| { | |
| "epoch": 5.616, | |
| "grad_norm": 0.6902304457311933, | |
| "learning_rate": 5.0114589676177e-07, | |
| "loss": 0.3622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36030685901641846, | |
| "step": 3510, | |
| "valid_targets_mean": 2209.7, | |
| "valid_targets_min": 948 | |
| }, | |
| { | |
| "epoch": 5.624, | |
| "grad_norm": 0.6614672483667304, | |
| "learning_rate": 4.806500049866492e-07, | |
| "loss": 0.3462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3510351777076721, | |
| "step": 3515, | |
| "valid_targets_mean": 2432.2, | |
| "valid_targets_min": 842 | |
| }, | |
| { | |
| "epoch": 5.632, | |
| "grad_norm": 0.6800892158996101, | |
| "learning_rate": 4.6057693442383756e-07, | |
| "loss": 0.3569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3634949028491974, | |
| "step": 3520, | |
| "valid_targets_mean": 2441.5, | |
| "valid_targets_min": 720 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "grad_norm": 0.6769618683554501, | |
| "learning_rate": 4.409271198890519e-07, | |
| "loss": 0.3484, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37198689579963684, | |
| "step": 3525, | |
| "valid_targets_mean": 2369.9, | |
| "valid_targets_min": 1527 | |
| }, | |
| { | |
| "epoch": 5.648, | |
| "grad_norm": 0.666362195969312, | |
| "learning_rate": 4.217009870295763e-07, | |
| "loss": 0.3497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3538084626197815, | |
| "step": 3530, | |
| "valid_targets_mean": 2409.4, | |
| "valid_targets_min": 1270 | |
| }, | |
| { | |
| "epoch": 5.656, | |
| "grad_norm": 0.6739195979135522, | |
| "learning_rate": 4.028989523150628e-07, | |
| "loss": 0.3516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3414755165576935, | |
| "step": 3535, | |
| "valid_targets_mean": 2567.1, | |
| "valid_targets_min": 691 | |
| }, | |
| { | |
| "epoch": 5.664, | |
| "grad_norm": 0.7059314292395321, | |
| "learning_rate": 3.8452142302849216e-07, | |
| "loss": 0.3524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3656729459762573, | |
| "step": 3540, | |
| "valid_targets_mean": 2145.8, | |
| "valid_targets_min": 699 | |
| }, | |
| { | |
| "epoch": 5.672, | |
| "grad_norm": 0.7327175619232137, | |
| "learning_rate": 3.665687972573606e-07, | |
| "loss": 0.3554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33814215660095215, | |
| "step": 3545, | |
| "valid_targets_mean": 1918.9, | |
| "valid_targets_min": 706 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "grad_norm": 0.6698574435915802, | |
| "learning_rate": 3.4904146388506475e-07, | |
| "loss": 0.3644, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.371431827545166, | |
| "step": 3550, | |
| "valid_targets_mean": 2439.1, | |
| "valid_targets_min": 1297 | |
| }, | |
| { | |
| "epoch": 5.688, | |
| "grad_norm": 0.6426157118596137, | |
| "learning_rate": 3.319398025824572e-07, | |
| "loss": 0.3686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36349034309387207, | |
| "step": 3555, | |
| "valid_targets_mean": 2658.6, | |
| "valid_targets_min": 833 | |
| }, | |
| { | |
| "epoch": 5.696, | |
| "grad_norm": 0.7173898048768296, | |
| "learning_rate": 3.152641837996373e-07, | |
| "loss": 0.3569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3507586121559143, | |
| "step": 3560, | |
| "valid_targets_mean": 2054.9, | |
| "valid_targets_min": 876 | |
| }, | |
| { | |
| "epoch": 5.704, | |
| "grad_norm": 0.6703837137982286, | |
| "learning_rate": 2.990149687579247e-07, | |
| "loss": 0.3426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3759458661079407, | |
| "step": 3565, | |
| "valid_targets_mean": 2434.6, | |
| "valid_targets_min": 1637 | |
| }, | |
| { | |
| "epoch": 5.712, | |
| "grad_norm": 0.6625498990103753, | |
| "learning_rate": 2.8319250944203625e-07, | |
| "loss": 0.3527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31836962699890137, | |
| "step": 3570, | |
| "valid_targets_mean": 2958.1, | |
| "valid_targets_min": 1197 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "grad_norm": 0.655529717895589, | |
| "learning_rate": 2.677971485924502e-07, | |
| "loss": 0.36, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3503941297531128, | |
| "step": 3575, | |
| "valid_targets_mean": 2468.9, | |
| "valid_targets_min": 645 | |
| }, | |
| { | |
| "epoch": 5.728, | |
| "grad_norm": 0.6056215587391824, | |
| "learning_rate": 2.52829219697992e-07, | |
| "loss": 0.3554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34623590111732483, | |
| "step": 3580, | |
| "valid_targets_mean": 2907.1, | |
| "valid_targets_min": 2005 | |
| }, | |
| { | |
| "epoch": 5.736, | |
| "grad_norm": 0.7004852586309464, | |
| "learning_rate": 2.3828904698861565e-07, | |
| "loss": 0.3497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36775827407836914, | |
| "step": 3585, | |
| "valid_targets_mean": 2347.5, | |
| "valid_targets_min": 1409 | |
| }, | |
| { | |
| "epoch": 5.744, | |
| "grad_norm": 0.660691313353332, | |
| "learning_rate": 2.2417694542836489e-07, | |
| "loss": 0.3454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3477632403373718, | |
| "step": 3590, | |
| "valid_targets_mean": 2542.6, | |
| "valid_targets_min": 762 | |
| }, | |
| { | |
| "epoch": 5.752, | |
| "grad_norm": 0.6722150282665975, | |
| "learning_rate": 2.104932207085586e-07, | |
| "loss": 0.3588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3427378535270691, | |
| "step": 3595, | |
| "valid_targets_mean": 2306.7, | |
| "valid_targets_min": 591 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "grad_norm": 0.6774361071713108, | |
| "learning_rate": 1.97238169241174e-07, | |
| "loss": 0.3484, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3895723223686218, | |
| "step": 3600, | |
| "valid_targets_mean": 2637.4, | |
| "valid_targets_min": 1795 | |
| }, | |
| { | |
| "epoch": 5.768, | |
| "grad_norm": 0.6995990010581683, | |
| "learning_rate": 1.8441207815241613e-07, | |
| "loss": 0.3611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36859995126724243, | |
| "step": 3605, | |
| "valid_targets_mean": 2243.5, | |
| "valid_targets_min": 683 | |
| }, | |
| { | |
| "epoch": 5.776, | |
| "grad_norm": 0.7097143380434422, | |
| "learning_rate": 1.720152252765095e-07, | |
| "loss": 0.3538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3496978282928467, | |
| "step": 3610, | |
| "valid_targets_mean": 2155.9, | |
| "valid_targets_min": 651 | |
| }, | |
| { | |
| "epoch": 5.784, | |
| "grad_norm": 0.5728311009707366, | |
| "learning_rate": 1.600478791496629e-07, | |
| "loss": 0.3453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3100730776786804, | |
| "step": 3615, | |
| "valid_targets_mean": 2915.6, | |
| "valid_targets_min": 583 | |
| }, | |
| { | |
| "epoch": 5.792, | |
| "grad_norm": 0.6428640732598566, | |
| "learning_rate": 1.4851029900427415e-07, | |
| "loss": 0.3608, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3315027356147766, | |
| "step": 3620, | |
| "valid_targets_mean": 2660.2, | |
| "valid_targets_min": 814 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "grad_norm": 0.687739684791235, | |
| "learning_rate": 1.3740273476329224e-07, | |
| "loss": 0.3461, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3281402289867401, | |
| "step": 3625, | |
| "valid_targets_mean": 2156.2, | |
| "valid_targets_min": 926 | |
| }, | |
| { | |
| "epoch": 5.808, | |
| "grad_norm": 0.7237593508562562, | |
| "learning_rate": 1.2672542703482616e-07, | |
| "loss": 0.3524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3774512708187103, | |
| "step": 3630, | |
| "valid_targets_mean": 2193.0, | |
| "valid_targets_min": 1095 | |
| }, | |
| { | |
| "epoch": 5.816, | |
| "grad_norm": 0.7248363306939806, | |
| "learning_rate": 1.164786071069135e-07, | |
| "loss": 0.3639, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3838192820549011, | |
| "step": 3635, | |
| "valid_targets_mean": 2081.1, | |
| "valid_targets_min": 370 | |
| }, | |
| { | |
| "epoch": 5.824, | |
| "grad_norm": 0.8579885115423562, | |
| "learning_rate": 1.0666249694251785e-07, | |
| "loss": 0.3602, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3642842769622803, | |
| "step": 3640, | |
| "valid_targets_mean": 2525.1, | |
| "valid_targets_min": 1365 | |
| }, | |
| { | |
| "epoch": 5.832, | |
| "grad_norm": 0.6333435755510938, | |
| "learning_rate": 9.72773091747281e-08, | |
| "loss": 0.3441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3298102021217346, | |
| "step": 3645, | |
| "valid_targets_mean": 2519.0, | |
| "valid_targets_min": 1529 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "grad_norm": 0.6865034564959145, | |
| "learning_rate": 8.832324710214002e-08, | |
| "loss": 0.3694, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36878472566604614, | |
| "step": 3650, | |
| "valid_targets_mean": 2381.9, | |
| "valid_targets_min": 818 | |
| }, | |
| { | |
| "epoch": 5.848, | |
| "grad_norm": 0.7161601334807214, | |
| "learning_rate": 7.980050468445744e-08, | |
| "loss": 0.3464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3624423146247864, | |
| "step": 3655, | |
| "valid_targets_mean": 2251.8, | |
| "valid_targets_min": 741 | |
| }, | |
| { | |
| "epoch": 5.856, | |
| "grad_norm": 0.7718758347904282, | |
| "learning_rate": 7.170926653829347e-08, | |
| "loss": 0.3615, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39595723152160645, | |
| "step": 3660, | |
| "valid_targets_mean": 1961.1, | |
| "valid_targets_min": 751 | |
| }, | |
| { | |
| "epoch": 5.864, | |
| "grad_norm": 0.7028502040729387, | |
| "learning_rate": 6.404970793317145e-08, | |
| "loss": 0.3606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3521386384963989, | |
| "step": 3665, | |
| "valid_targets_mean": 2169.5, | |
| "valid_targets_min": 813 | |
| }, | |
| { | |
| "epoch": 5.872, | |
| "grad_norm": 0.6710893596193402, | |
| "learning_rate": 5.682199478772133e-08, | |
| "loss": 0.3531, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3567180335521698, | |
| "step": 3670, | |
| "valid_targets_mean": 2343.6, | |
| "valid_targets_min": 982 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "grad_norm": 0.6064785483244445, | |
| "learning_rate": 5.0026283666093635e-08, | |
| "loss": 0.3714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3486100435256958, | |
| "step": 3675, | |
| "valid_targets_mean": 2649.5, | |
| "valid_targets_min": 1453 | |
| }, | |
| { | |
| "epoch": 5.888, | |
| "grad_norm": 0.6260330281616615, | |
| "learning_rate": 4.366272177456665e-08, | |
| "loss": 0.3654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3249626159667969, | |
| "step": 3680, | |
| "valid_targets_mean": 2491.5, | |
| "valid_targets_min": 900 | |
| }, | |
| { | |
| "epoch": 5.896, | |
| "grad_norm": 0.5699534851889925, | |
| "learning_rate": 3.773144695834674e-08, | |
| "loss": 0.3353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3237248361110687, | |
| "step": 3685, | |
| "valid_targets_mean": 3058.4, | |
| "valid_targets_min": 592 | |
| }, | |
| { | |
| "epoch": 5.904, | |
| "grad_norm": 0.6438269114276662, | |
| "learning_rate": 3.223258769860405e-08, | |
| "loss": 0.3618, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35090720653533936, | |
| "step": 3690, | |
| "valid_targets_mean": 2574.9, | |
| "valid_targets_min": 1240 | |
| }, | |
| { | |
| "epoch": 5.912, | |
| "grad_norm": 0.6683366651264305, | |
| "learning_rate": 2.716626310966808e-08, | |
| "loss": 0.3451, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34934568405151367, | |
| "step": 3695, | |
| "valid_targets_mean": 2282.0, | |
| "valid_targets_min": 644 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "grad_norm": 0.7329792310074869, | |
| "learning_rate": 2.253258293645866e-08, | |
| "loss": 0.3572, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3719084560871124, | |
| "step": 3700, | |
| "valid_targets_mean": 2026.5, | |
| "valid_targets_min": 697 | |
| }, | |
| { | |
| "epoch": 5.928, | |
| "grad_norm": 0.6920279565678626, | |
| "learning_rate": 1.8331647552110033e-08, | |
| "loss": 0.3535, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3860562741756439, | |
| "step": 3705, | |
| "valid_targets_mean": 2250.8, | |
| "valid_targets_min": 544 | |
| }, | |
| { | |
| "epoch": 5.936, | |
| "grad_norm": 0.6355753971291512, | |
| "learning_rate": 1.456354795578374e-08, | |
| "loss": 0.3528, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37555772066116333, | |
| "step": 3710, | |
| "valid_targets_mean": 2537.4, | |
| "valid_targets_min": 1178 | |
| }, | |
| { | |
| "epoch": 5.944, | |
| "grad_norm": 0.7374044889351038, | |
| "learning_rate": 1.1228365770714622e-08, | |
| "loss": 0.3565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37717506289482117, | |
| "step": 3715, | |
| "valid_targets_mean": 2046.4, | |
| "valid_targets_min": 886 | |
| }, | |
| { | |
| "epoch": 5.952, | |
| "grad_norm": 0.6939052200451631, | |
| "learning_rate": 8.326173242432233e-09, | |
| "loss": 0.3657, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3981803059577942, | |
| "step": 3720, | |
| "valid_targets_mean": 2279.1, | |
| "valid_targets_min": 1434 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "grad_norm": 0.6916821499159527, | |
| "learning_rate": 5.857033237199883e-09, | |
| "loss": 0.3626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36604171991348267, | |
| "step": 3725, | |
| "valid_targets_mean": 2196.9, | |
| "valid_targets_min": 567 | |
| }, | |
| { | |
| "epoch": 5.968, | |
| "grad_norm": 0.816549828786453, | |
| "learning_rate": 3.820999240644608e-09, | |
| "loss": 0.364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3439550995826721, | |
| "step": 3730, | |
| "valid_targets_mean": 2641.3, | |
| "valid_targets_min": 677 | |
| }, | |
| { | |
| "epoch": 5.976, | |
| "grad_norm": 0.6998780250505308, | |
| "learning_rate": 2.2181153566158687e-09, | |
| "loss": 0.3538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34991949796676636, | |
| "step": 3735, | |
| "valid_targets_mean": 2220.7, | |
| "valid_targets_min": 638 | |
| }, | |
| { | |
| "epoch": 5.984, | |
| "grad_norm": 0.6161285767403547, | |
| "learning_rate": 1.0484163062107755e-09, | |
| "loss": 0.3444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32655662298202515, | |
| "step": 3740, | |
| "valid_targets_mean": 2840.2, | |
| "valid_targets_min": 745 | |
| }, | |
| { | |
| "epoch": 5.992, | |
| "grad_norm": 0.7847617955919047, | |
| "learning_rate": 3.11927427034675e-10, | |
| "loss": 0.3543, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4086189270019531, | |
| "step": 3745, | |
| "valid_targets_mean": 1846.4, | |
| "valid_targets_min": 674 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.7040092695127196, | |
| "learning_rate": 8.664672648261985e-12, | |
| "loss": 0.3618, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36595475673675537, | |
| "step": 3750, | |
| "valid_targets_mean": 2265.2, | |
| "valid_targets_min": 678 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36595475673675537, | |
| "step": 3750, | |
| "total_flos": 645391703212032.0, | |
| "train_loss": 0.40567625595728557, | |
| "train_runtime": 15381.7515, | |
| "train_samples_per_second": 3.9, | |
| "train_steps_per_second": 0.244, | |
| "valid_targets_mean": 2265.2, | |
| "valid_targets_min": 678 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 3750, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 645391703212032.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |