| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 8.0, | |
| "eval_steps": 500, | |
| "global_step": 1632, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02457002457002457, | |
| "grad_norm": 3.5317310427201827, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.8461, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2849353849887848, | |
| "step": 5, | |
| "valid_targets_mean": 1490.1, | |
| "valid_targets_min": 971 | |
| }, | |
| { | |
| "epoch": 0.04914004914004914, | |
| "grad_norm": 1.202219130090371, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4291, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2022167444229126, | |
| "step": 10, | |
| "valid_targets_mean": 1436.6, | |
| "valid_targets_min": 853 | |
| }, | |
| { | |
| "epoch": 0.07371007371007371, | |
| "grad_norm": 0.8787295845972538, | |
| "learning_rate": 9.999765825644824e-05, | |
| "loss": 0.335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1640535295009613, | |
| "step": 15, | |
| "valid_targets_mean": 1385.0, | |
| "valid_targets_min": 568 | |
| }, | |
| { | |
| "epoch": 0.09828009828009827, | |
| "grad_norm": 0.7147045925033308, | |
| "learning_rate": 9.999063324514344e-05, | |
| "loss": 0.3171, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15234357118606567, | |
| "step": 20, | |
| "valid_targets_mean": 1367.7, | |
| "valid_targets_min": 574 | |
| }, | |
| { | |
| "epoch": 0.12285012285012285, | |
| "grad_norm": 0.6497958620035181, | |
| "learning_rate": 9.99789256241166e-05, | |
| "loss": 0.285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1415839046239853, | |
| "step": 25, | |
| "valid_targets_mean": 1316.3, | |
| "valid_targets_min": 783 | |
| }, | |
| { | |
| "epoch": 0.14742014742014742, | |
| "grad_norm": 0.5863502908920704, | |
| "learning_rate": 9.996253649001759e-05, | |
| "loss": 0.2762, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13542860746383667, | |
| "step": 30, | |
| "valid_targets_mean": 1348.1, | |
| "valid_targets_min": 693 | |
| }, | |
| { | |
| "epoch": 0.171990171990172, | |
| "grad_norm": 0.626750978778753, | |
| "learning_rate": 9.994146737801235e-05, | |
| "loss": 0.2814, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17099091410636902, | |
| "step": 35, | |
| "valid_targets_mean": 1599.6, | |
| "valid_targets_min": 768 | |
| }, | |
| { | |
| "epoch": 0.19656019656019655, | |
| "grad_norm": 0.5722325932529337, | |
| "learning_rate": 9.991572026163916e-05, | |
| "loss": 0.2653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12752854824066162, | |
| "step": 40, | |
| "valid_targets_mean": 1151.1, | |
| "valid_targets_min": 712 | |
| }, | |
| { | |
| "epoch": 0.22113022113022113, | |
| "grad_norm": 0.8638136318346279, | |
| "learning_rate": 9.988529755262379e-05, | |
| "loss": 0.2596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13361497223377228, | |
| "step": 45, | |
| "valid_targets_mean": 1315.8, | |
| "valid_targets_min": 649 | |
| }, | |
| { | |
| "epoch": 0.2457002457002457, | |
| "grad_norm": 1.8587267978944855, | |
| "learning_rate": 9.985020210065353e-05, | |
| "loss": 0.2706, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12216434627771378, | |
| "step": 50, | |
| "valid_targets_mean": 1322.4, | |
| "valid_targets_min": 940 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "grad_norm": 0.547941104608437, | |
| "learning_rate": 9.981043719311034e-05, | |
| "loss": 0.2678, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14429020881652832, | |
| "step": 55, | |
| "valid_targets_mean": 1408.8, | |
| "valid_targets_min": 897 | |
| }, | |
| { | |
| "epoch": 0.29484029484029484, | |
| "grad_norm": 0.5986557075278855, | |
| "learning_rate": 9.976600655476283e-05, | |
| "loss": 0.2557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14186960458755493, | |
| "step": 60, | |
| "valid_targets_mean": 1364.4, | |
| "valid_targets_min": 710 | |
| }, | |
| { | |
| "epoch": 0.3194103194103194, | |
| "grad_norm": 0.5897591965468908, | |
| "learning_rate": 9.971691434741742e-05, | |
| "loss": 0.2584, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11381076276302338, | |
| "step": 65, | |
| "valid_targets_mean": 1072.5, | |
| "valid_targets_min": 663 | |
| }, | |
| { | |
| "epoch": 0.343980343980344, | |
| "grad_norm": 0.5433018400955162, | |
| "learning_rate": 9.966316516952854e-05, | |
| "loss": 0.2649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13451801240444183, | |
| "step": 70, | |
| "valid_targets_mean": 1287.7, | |
| "valid_targets_min": 637 | |
| }, | |
| { | |
| "epoch": 0.36855036855036855, | |
| "grad_norm": 0.47134946266510613, | |
| "learning_rate": 9.960476405576782e-05, | |
| "loss": 0.256, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12614522874355316, | |
| "step": 75, | |
| "valid_targets_mean": 1355.8, | |
| "valid_targets_min": 646 | |
| }, | |
| { | |
| "epoch": 0.3931203931203931, | |
| "grad_norm": 0.5009575392578494, | |
| "learning_rate": 9.95417164765525e-05, | |
| "loss": 0.2568, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11249350011348724, | |
| "step": 80, | |
| "valid_targets_mean": 1198.3, | |
| "valid_targets_min": 800 | |
| }, | |
| { | |
| "epoch": 0.4176904176904177, | |
| "grad_norm": 0.4964813124723332, | |
| "learning_rate": 9.947402833753307e-05, | |
| "loss": 0.2573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13899488747119904, | |
| "step": 85, | |
| "valid_targets_mean": 1440.5, | |
| "valid_targets_min": 598 | |
| }, | |
| { | |
| "epoch": 0.44226044226044225, | |
| "grad_norm": 0.52664451404203, | |
| "learning_rate": 9.940170597904006e-05, | |
| "loss": 0.2607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1289326548576355, | |
| "step": 90, | |
| "valid_targets_mean": 1178.6, | |
| "valid_targets_min": 721 | |
| }, | |
| { | |
| "epoch": 0.4668304668304668, | |
| "grad_norm": 0.47578073110132335, | |
| "learning_rate": 9.932475617549016e-05, | |
| "loss": 0.2513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13900956511497498, | |
| "step": 95, | |
| "valid_targets_mean": 1329.9, | |
| "valid_targets_min": 619 | |
| }, | |
| { | |
| "epoch": 0.4914004914004914, | |
| "grad_norm": 0.4555196443562555, | |
| "learning_rate": 9.924318613475156e-05, | |
| "loss": 0.2424, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09756769239902496, | |
| "step": 100, | |
| "valid_targets_mean": 1299.4, | |
| "valid_targets_min": 618 | |
| }, | |
| { | |
| "epoch": 0.515970515970516, | |
| "grad_norm": 0.4403223130979152, | |
| "learning_rate": 9.915700349746898e-05, | |
| "loss": 0.253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12914995849132538, | |
| "step": 105, | |
| "valid_targets_mean": 1420.3, | |
| "valid_targets_min": 861 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 0.45106359259219175, | |
| "learning_rate": 9.906621633634782e-05, | |
| "loss": 0.2423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1240796148777008, | |
| "step": 110, | |
| "valid_targets_mean": 1370.2, | |
| "valid_targets_min": 783 | |
| }, | |
| { | |
| "epoch": 0.5651105651105651, | |
| "grad_norm": 0.48058617450569124, | |
| "learning_rate": 9.897083315539803e-05, | |
| "loss": 0.2469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12699732184410095, | |
| "step": 115, | |
| "valid_targets_mean": 1312.5, | |
| "valid_targets_min": 809 | |
| }, | |
| { | |
| "epoch": 0.5896805896805897, | |
| "grad_norm": 0.4221194167388693, | |
| "learning_rate": 9.88708628891376e-05, | |
| "loss": 0.2463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11394324898719788, | |
| "step": 120, | |
| "valid_targets_mean": 1403.4, | |
| "valid_targets_min": 631 | |
| }, | |
| { | |
| "epoch": 0.6142506142506142, | |
| "grad_norm": 0.4610537918913265, | |
| "learning_rate": 9.876631490175555e-05, | |
| "loss": 0.2502, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12918531894683838, | |
| "step": 125, | |
| "valid_targets_mean": 1384.2, | |
| "valid_targets_min": 643 | |
| }, | |
| { | |
| "epoch": 0.6388206388206388, | |
| "grad_norm": 0.42247998299685735, | |
| "learning_rate": 9.86571989862349e-05, | |
| "loss": 0.2502, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11708711087703705, | |
| "step": 130, | |
| "valid_targets_mean": 1212.7, | |
| "valid_targets_min": 577 | |
| }, | |
| { | |
| "epoch": 0.6633906633906634, | |
| "grad_norm": 0.42116965624763975, | |
| "learning_rate": 9.854352536343534e-05, | |
| "loss": 0.2503, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14355707168579102, | |
| "step": 135, | |
| "valid_targets_mean": 1577.8, | |
| "valid_targets_min": 753 | |
| }, | |
| { | |
| "epoch": 0.687960687960688, | |
| "grad_norm": 0.596658705056403, | |
| "learning_rate": 9.842530468113578e-05, | |
| "loss": 0.2456, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.113030344247818, | |
| "step": 140, | |
| "valid_targets_mean": 1120.2, | |
| "valid_targets_min": 631 | |
| }, | |
| { | |
| "epoch": 0.7125307125307125, | |
| "grad_norm": 0.44263538751073955, | |
| "learning_rate": 9.830254801303702e-05, | |
| "loss": 0.2416, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11774349212646484, | |
| "step": 145, | |
| "valid_targets_mean": 1205.3, | |
| "valid_targets_min": 833 | |
| }, | |
| { | |
| "epoch": 0.7371007371007371, | |
| "grad_norm": 0.40787430941229114, | |
| "learning_rate": 9.817526685772452e-05, | |
| "loss": 0.2437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12346543371677399, | |
| "step": 150, | |
| "valid_targets_mean": 1249.8, | |
| "valid_targets_min": 656 | |
| }, | |
| { | |
| "epoch": 0.7616707616707616, | |
| "grad_norm": 0.4196828968071708, | |
| "learning_rate": 9.804347313759126e-05, | |
| "loss": 0.239, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11186127364635468, | |
| "step": 155, | |
| "valid_targets_mean": 1366.1, | |
| "valid_targets_min": 828 | |
| }, | |
| { | |
| "epoch": 0.7862407862407862, | |
| "grad_norm": 0.38233087493681384, | |
| "learning_rate": 9.790717919772102e-05, | |
| "loss": 0.23, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11590471118688583, | |
| "step": 160, | |
| "valid_targets_mean": 1399.3, | |
| "valid_targets_min": 848 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "grad_norm": 0.3840556079978897, | |
| "learning_rate": 9.776639780473198e-05, | |
| "loss": 0.2354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11734741926193237, | |
| "step": 165, | |
| "valid_targets_mean": 1268.5, | |
| "valid_targets_min": 559 | |
| }, | |
| { | |
| "epoch": 0.8353808353808354, | |
| "grad_norm": 0.4109296293583164, | |
| "learning_rate": 9.762114214558092e-05, | |
| "loss": 0.2412, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11579617857933044, | |
| "step": 170, | |
| "valid_targets_mean": 1429.6, | |
| "valid_targets_min": 613 | |
| }, | |
| { | |
| "epoch": 0.85995085995086, | |
| "grad_norm": 0.37286715566379114, | |
| "learning_rate": 9.747142582632795e-05, | |
| "loss": 0.2466, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11519800126552582, | |
| "step": 175, | |
| "valid_targets_mean": 1256.2, | |
| "valid_targets_min": 592 | |
| }, | |
| { | |
| "epoch": 0.8845208845208845, | |
| "grad_norm": 0.3611949235146365, | |
| "learning_rate": 9.731726287086211e-05, | |
| "loss": 0.2439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11991137266159058, | |
| "step": 180, | |
| "valid_targets_mean": 1582.8, | |
| "valid_targets_min": 696 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 0.39325905281989476, | |
| "learning_rate": 9.715866771958766e-05, | |
| "loss": 0.2381, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11788970232009888, | |
| "step": 185, | |
| "valid_targets_mean": 1309.1, | |
| "valid_targets_min": 601 | |
| }, | |
| { | |
| "epoch": 0.9336609336609336, | |
| "grad_norm": 0.4568668063950179, | |
| "learning_rate": 9.699565522807151e-05, | |
| "loss": 0.2418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1191166564822197, | |
| "step": 190, | |
| "valid_targets_mean": 1381.5, | |
| "valid_targets_min": 855 | |
| }, | |
| { | |
| "epoch": 0.9582309582309583, | |
| "grad_norm": 0.3969433794676836, | |
| "learning_rate": 9.682824066565168e-05, | |
| "loss": 0.2312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1245565116405487, | |
| "step": 195, | |
| "valid_targets_mean": 1291.5, | |
| "valid_targets_min": 639 | |
| }, | |
| { | |
| "epoch": 0.9828009828009828, | |
| "grad_norm": 0.40452504567463415, | |
| "learning_rate": 9.665643971400709e-05, | |
| "loss": 0.2278, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12400303781032562, | |
| "step": 200, | |
| "valid_targets_mean": 1270.6, | |
| "valid_targets_min": 668 | |
| }, | |
| { | |
| "epoch": 1.0049140049140048, | |
| "grad_norm": 0.4019161541536171, | |
| "learning_rate": 9.648026846568853e-05, | |
| "loss": 0.2301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10288889706134796, | |
| "step": 205, | |
| "valid_targets_mean": 1385.6, | |
| "valid_targets_min": 673 | |
| }, | |
| { | |
| "epoch": 1.0294840294840295, | |
| "grad_norm": 0.4713964035318676, | |
| "learning_rate": 9.629974342261142e-05, | |
| "loss": 0.188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08235287666320801, | |
| "step": 210, | |
| "valid_targets_mean": 1291.2, | |
| "valid_targets_min": 658 | |
| }, | |
| { | |
| "epoch": 1.054054054054054, | |
| "grad_norm": 0.3728121128850294, | |
| "learning_rate": 9.611488149450995e-05, | |
| "loss": 0.179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07877619564533234, | |
| "step": 215, | |
| "valid_targets_mean": 1312.9, | |
| "valid_targets_min": 717 | |
| }, | |
| { | |
| "epoch": 1.0786240786240786, | |
| "grad_norm": 0.39178811104454536, | |
| "learning_rate": 9.592569999735325e-05, | |
| "loss": 0.1728, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09154706448316574, | |
| "step": 220, | |
| "valid_targets_mean": 1480.9, | |
| "valid_targets_min": 746 | |
| }, | |
| { | |
| "epoch": 1.1031941031941033, | |
| "grad_norm": 0.3386346510433248, | |
| "learning_rate": 9.57322166517234e-05, | |
| "loss": 0.1787, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09614543616771698, | |
| "step": 225, | |
| "valid_targets_mean": 1499.4, | |
| "valid_targets_min": 873 | |
| }, | |
| { | |
| "epoch": 1.1277641277641277, | |
| "grad_norm": 0.39955268162367374, | |
| "learning_rate": 9.553444958115545e-05, | |
| "loss": 0.1794, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0914674922823906, | |
| "step": 230, | |
| "valid_targets_mean": 1553.2, | |
| "valid_targets_min": 975 | |
| }, | |
| { | |
| "epoch": 1.1523341523341524, | |
| "grad_norm": 0.37356699705679897, | |
| "learning_rate": 9.53324173104399e-05, | |
| "loss": 0.1758, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09144826233386993, | |
| "step": 235, | |
| "valid_targets_mean": 1287.4, | |
| "valid_targets_min": 682 | |
| }, | |
| { | |
| "epoch": 1.1769041769041768, | |
| "grad_norm": 0.3786464677111562, | |
| "learning_rate": 9.512613876388742e-05, | |
| "loss": 0.1767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0818377286195755, | |
| "step": 240, | |
| "valid_targets_mean": 1352.4, | |
| "valid_targets_min": 800 | |
| }, | |
| { | |
| "epoch": 1.2014742014742015, | |
| "grad_norm": 0.36472496386857967, | |
| "learning_rate": 9.491563326355628e-05, | |
| "loss": 0.1798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08443672955036163, | |
| "step": 245, | |
| "valid_targets_mean": 1144.6, | |
| "valid_targets_min": 706 | |
| }, | |
| { | |
| "epoch": 1.2260442260442261, | |
| "grad_norm": 0.4060906953722515, | |
| "learning_rate": 9.47009205274424e-05, | |
| "loss": 0.1826, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0913555771112442, | |
| "step": 250, | |
| "valid_targets_mean": 1341.1, | |
| "valid_targets_min": 729 | |
| }, | |
| { | |
| "epoch": 1.2506142506142506, | |
| "grad_norm": 0.38184644361730635, | |
| "learning_rate": 9.448202066763237e-05, | |
| "loss": 0.1814, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08872812986373901, | |
| "step": 255, | |
| "valid_targets_mean": 1251.3, | |
| "valid_targets_min": 761 | |
| }, | |
| { | |
| "epoch": 1.2751842751842752, | |
| "grad_norm": 0.35294094367142664, | |
| "learning_rate": 9.425895418841961e-05, | |
| "loss": 0.1799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07888481020927429, | |
| "step": 260, | |
| "valid_targets_mean": 1164.6, | |
| "valid_targets_min": 663 | |
| }, | |
| { | |
| "epoch": 1.2997542997542997, | |
| "grad_norm": 0.36613096681483226, | |
| "learning_rate": 9.403174198438372e-05, | |
| "loss": 0.1765, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08659601211547852, | |
| "step": 265, | |
| "valid_targets_mean": 1139.0, | |
| "valid_targets_min": 614 | |
| }, | |
| { | |
| "epoch": 1.3243243243243243, | |
| "grad_norm": 0.38382168587316545, | |
| "learning_rate": 9.380040533843319e-05, | |
| "loss": 0.1791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09439398348331451, | |
| "step": 270, | |
| "valid_targets_mean": 1348.4, | |
| "valid_targets_min": 854 | |
| }, | |
| { | |
| "epoch": 1.348894348894349, | |
| "grad_norm": 0.3642425830054166, | |
| "learning_rate": 9.356496591981204e-05, | |
| "loss": 0.1775, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08763645589351654, | |
| "step": 275, | |
| "valid_targets_mean": 1346.3, | |
| "valid_targets_min": 650 | |
| }, | |
| { | |
| "epoch": 1.3734643734643734, | |
| "grad_norm": 0.3832204309855803, | |
| "learning_rate": 9.332544578206985e-05, | |
| "loss": 0.1771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08698828518390656, | |
| "step": 280, | |
| "valid_targets_mean": 1353.8, | |
| "valid_targets_min": 623 | |
| }, | |
| { | |
| "epoch": 1.398034398034398, | |
| "grad_norm": 0.3561629333331951, | |
| "learning_rate": 9.308186736099614e-05, | |
| "loss": 0.1838, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09670018404722214, | |
| "step": 285, | |
| "valid_targets_mean": 1320.3, | |
| "valid_targets_min": 781 | |
| }, | |
| { | |
| "epoch": 1.4226044226044225, | |
| "grad_norm": 0.3765510806090441, | |
| "learning_rate": 9.28342534725188e-05, | |
| "loss": 0.1823, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08860672265291214, | |
| "step": 290, | |
| "valid_targets_mean": 1303.7, | |
| "valid_targets_min": 582 | |
| }, | |
| { | |
| "epoch": 1.4471744471744472, | |
| "grad_norm": 0.33661939310832245, | |
| "learning_rate": 9.258262731056688e-05, | |
| "loss": 0.1769, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08029186725616455, | |
| "step": 295, | |
| "valid_targets_mean": 1230.8, | |
| "valid_targets_min": 688 | |
| }, | |
| { | |
| "epoch": 1.4717444717444716, | |
| "grad_norm": 0.3643823226415973, | |
| "learning_rate": 9.23270124448981e-05, | |
| "loss": 0.1814, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0855192244052887, | |
| "step": 300, | |
| "valid_targets_mean": 1241.6, | |
| "valid_targets_min": 684 | |
| }, | |
| { | |
| "epoch": 1.4963144963144963, | |
| "grad_norm": 0.36001426651444374, | |
| "learning_rate": 9.206743281889097e-05, | |
| "loss": 0.1741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08223570883274078, | |
| "step": 305, | |
| "valid_targets_mean": 1359.8, | |
| "valid_targets_min": 633 | |
| }, | |
| { | |
| "epoch": 1.520884520884521, | |
| "grad_norm": 0.3622448158704566, | |
| "learning_rate": 9.18039127473021e-05, | |
| "loss": 0.1825, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08101462572813034, | |
| "step": 310, | |
| "valid_targets_mean": 1251.8, | |
| "valid_targets_min": 502 | |
| }, | |
| { | |
| "epoch": 1.5454545454545454, | |
| "grad_norm": 0.3673440986818992, | |
| "learning_rate": 9.153647691398866e-05, | |
| "loss": 0.1824, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08977461606264114, | |
| "step": 315, | |
| "valid_targets_mean": 1314.8, | |
| "valid_targets_min": 650 | |
| }, | |
| { | |
| "epoch": 1.57002457002457, | |
| "grad_norm": 0.3712553762342498, | |
| "learning_rate": 9.126515036959613e-05, | |
| "loss": 0.1834, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0836411640048027, | |
| "step": 320, | |
| "valid_targets_mean": 1373.9, | |
| "valid_targets_min": 741 | |
| }, | |
| { | |
| "epoch": 1.5945945945945947, | |
| "grad_norm": 0.3641331948462104, | |
| "learning_rate": 9.098995852921197e-05, | |
| "loss": 0.1788, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08982405811548233, | |
| "step": 325, | |
| "valid_targets_mean": 1291.5, | |
| "valid_targets_min": 654 | |
| }, | |
| { | |
| "epoch": 1.6191646191646192, | |
| "grad_norm": 0.34273897831427635, | |
| "learning_rate": 9.07109271699849e-05, | |
| "loss": 0.1883, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07989470660686493, | |
| "step": 330, | |
| "valid_targets_mean": 1270.9, | |
| "valid_targets_min": 590 | |
| }, | |
| { | |
| "epoch": 1.6437346437346436, | |
| "grad_norm": 0.3849520242237801, | |
| "learning_rate": 9.042808242871035e-05, | |
| "loss": 0.1865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09401971101760864, | |
| "step": 335, | |
| "valid_targets_mean": 1381.6, | |
| "valid_targets_min": 765 | |
| }, | |
| { | |
| "epoch": 1.6683046683046683, | |
| "grad_norm": 0.36040402759215207, | |
| "learning_rate": 9.014145079938228e-05, | |
| "loss": 0.1838, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08834733068943024, | |
| "step": 340, | |
| "valid_targets_mean": 1301.9, | |
| "valid_targets_min": 724 | |
| }, | |
| { | |
| "epoch": 1.692874692874693, | |
| "grad_norm": 0.35689718091019035, | |
| "learning_rate": 8.985105913071148e-05, | |
| "loss": 0.1787, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10028181225061417, | |
| "step": 345, | |
| "valid_targets_mean": 1398.2, | |
| "valid_targets_min": 725 | |
| }, | |
| { | |
| "epoch": 1.7174447174447174, | |
| "grad_norm": 0.3615825498096839, | |
| "learning_rate": 8.955693462361065e-05, | |
| "loss": 0.179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09255008399486542, | |
| "step": 350, | |
| "valid_targets_mean": 1295.4, | |
| "valid_targets_min": 803 | |
| }, | |
| { | |
| "epoch": 1.742014742014742, | |
| "grad_norm": 0.35872374357460324, | |
| "learning_rate": 8.925910482864652e-05, | |
| "loss": 0.1877, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0775061845779419, | |
| "step": 355, | |
| "valid_targets_mean": 1276.3, | |
| "valid_targets_min": 631 | |
| }, | |
| { | |
| "epoch": 1.7665847665847667, | |
| "grad_norm": 0.35482753893532454, | |
| "learning_rate": 8.895759764345914e-05, | |
| "loss": 0.1828, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09774541854858398, | |
| "step": 360, | |
| "valid_targets_mean": 1289.6, | |
| "valid_targets_min": 622 | |
| }, | |
| { | |
| "epoch": 1.7911547911547911, | |
| "grad_norm": 0.3785565441243277, | |
| "learning_rate": 8.865244131014883e-05, | |
| "loss": 0.182, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0870940238237381, | |
| "step": 365, | |
| "valid_targets_mean": 1332.1, | |
| "valid_targets_min": 626 | |
| }, | |
| { | |
| "epoch": 1.8157248157248156, | |
| "grad_norm": 0.3552269775568485, | |
| "learning_rate": 8.834366441263056e-05, | |
| "loss": 0.1781, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08362573385238647, | |
| "step": 370, | |
| "valid_targets_mean": 1125.2, | |
| "valid_targets_min": 521 | |
| }, | |
| { | |
| "epoch": 1.8402948402948403, | |
| "grad_norm": 0.37478075988467835, | |
| "learning_rate": 8.803129587395673e-05, | |
| "loss": 0.181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08486934006214142, | |
| "step": 375, | |
| "valid_targets_mean": 1216.6, | |
| "valid_targets_min": 486 | |
| }, | |
| { | |
| "epoch": 1.864864864864865, | |
| "grad_norm": 0.3769227355201233, | |
| "learning_rate": 8.771536495360776e-05, | |
| "loss": 0.1777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09369044005870819, | |
| "step": 380, | |
| "valid_targets_mean": 1202.1, | |
| "valid_targets_min": 671 | |
| }, | |
| { | |
| "epoch": 1.8894348894348894, | |
| "grad_norm": 0.3428843010485239, | |
| "learning_rate": 8.739590124475148e-05, | |
| "loss": 0.1808, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08531995117664337, | |
| "step": 385, | |
| "valid_targets_mean": 1225.2, | |
| "valid_targets_min": 589 | |
| }, | |
| { | |
| "epoch": 1.914004914004914, | |
| "grad_norm": 0.3502029019390788, | |
| "learning_rate": 8.707293467147109e-05, | |
| "loss": 0.1809, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09207258373498917, | |
| "step": 390, | |
| "valid_targets_mean": 1262.0, | |
| "valid_targets_min": 909 | |
| }, | |
| { | |
| "epoch": 1.9385749385749387, | |
| "grad_norm": 0.33525127668618215, | |
| "learning_rate": 8.674649548596221e-05, | |
| "loss": 0.1778, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08499929308891296, | |
| "step": 395, | |
| "valid_targets_mean": 1225.2, | |
| "valid_targets_min": 527 | |
| }, | |
| { | |
| "epoch": 1.9631449631449631, | |
| "grad_norm": 0.3278244741049693, | |
| "learning_rate": 8.641661426569916e-05, | |
| "loss": 0.1777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.080820232629776, | |
| "step": 400, | |
| "valid_targets_mean": 1319.4, | |
| "valid_targets_min": 618 | |
| }, | |
| { | |
| "epoch": 1.9877149877149876, | |
| "grad_norm": 0.3479090084518426, | |
| "learning_rate": 8.608332191057076e-05, | |
| "loss": 0.1758, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08339822292327881, | |
| "step": 405, | |
| "valid_targets_mean": 1284.8, | |
| "valid_targets_min": 820 | |
| }, | |
| { | |
| "epoch": 2.0098280098280097, | |
| "grad_norm": 0.33772815977939674, | |
| "learning_rate": 8.57466496399859e-05, | |
| "loss": 0.1508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05549818277359009, | |
| "step": 410, | |
| "valid_targets_mean": 1236.8, | |
| "valid_targets_min": 698 | |
| }, | |
| { | |
| "epoch": 2.0343980343980346, | |
| "grad_norm": 0.35332743790390486, | |
| "learning_rate": 8.54066289899494e-05, | |
| "loss": 0.12, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05471636354923248, | |
| "step": 415, | |
| "valid_targets_mean": 1206.4, | |
| "valid_targets_min": 796 | |
| }, | |
| { | |
| "epoch": 2.058968058968059, | |
| "grad_norm": 0.34889197536174715, | |
| "learning_rate": 8.506329181010781e-05, | |
| "loss": 0.1139, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.052125804126262665, | |
| "step": 420, | |
| "valid_targets_mean": 1230.4, | |
| "valid_targets_min": 486 | |
| }, | |
| { | |
| "epoch": 2.0835380835380835, | |
| "grad_norm": 0.342810837053402, | |
| "learning_rate": 8.471667026076621e-05, | |
| "loss": 0.1151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05554091930389404, | |
| "step": 425, | |
| "valid_targets_mean": 1376.2, | |
| "valid_targets_min": 683 | |
| }, | |
| { | |
| "epoch": 2.108108108108108, | |
| "grad_norm": 0.3599219497453591, | |
| "learning_rate": 8.436679680987571e-05, | |
| "loss": 0.1148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.056023404002189636, | |
| "step": 430, | |
| "valid_targets_mean": 1327.0, | |
| "valid_targets_min": 755 | |
| }, | |
| { | |
| "epoch": 2.1326781326781328, | |
| "grad_norm": 0.37719592865909146, | |
| "learning_rate": 8.401370422999224e-05, | |
| "loss": 0.1172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06519174575805664, | |
| "step": 435, | |
| "valid_targets_mean": 1529.8, | |
| "valid_targets_min": 863 | |
| }, | |
| { | |
| "epoch": 2.157248157248157, | |
| "grad_norm": 0.3457538844180661, | |
| "learning_rate": 8.365742559520669e-05, | |
| "loss": 0.1169, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0638711005449295, | |
| "step": 440, | |
| "valid_targets_mean": 1522.7, | |
| "valid_targets_min": 881 | |
| }, | |
| { | |
| "epoch": 2.1818181818181817, | |
| "grad_norm": 0.3756160940209798, | |
| "learning_rate": 8.329799427804683e-05, | |
| "loss": 0.1173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05762581154704094, | |
| "step": 445, | |
| "valid_targets_mean": 1156.8, | |
| "valid_targets_min": 521 | |
| }, | |
| { | |
| "epoch": 2.2063882063882065, | |
| "grad_norm": 0.35582823840706373, | |
| "learning_rate": 8.293544394635149e-05, | |
| "loss": 0.1159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0591716468334198, | |
| "step": 450, | |
| "valid_targets_mean": 1395.5, | |
| "valid_targets_min": 575 | |
| }, | |
| { | |
| "epoch": 2.230958230958231, | |
| "grad_norm": 0.3474647468975514, | |
| "learning_rate": 8.256980856011672e-05, | |
| "loss": 0.1164, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05874478816986084, | |
| "step": 455, | |
| "valid_targets_mean": 1361.8, | |
| "valid_targets_min": 754 | |
| }, | |
| { | |
| "epoch": 2.2555282555282554, | |
| "grad_norm": 0.33681306262455324, | |
| "learning_rate": 8.22011223683148e-05, | |
| "loss": 0.114, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05156775563955307, | |
| "step": 460, | |
| "valid_targets_mean": 1222.9, | |
| "valid_targets_min": 658 | |
| }, | |
| { | |
| "epoch": 2.2800982800982803, | |
| "grad_norm": 0.3470018928684881, | |
| "learning_rate": 8.182941990568626e-05, | |
| "loss": 0.1144, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05891558527946472, | |
| "step": 465, | |
| "valid_targets_mean": 1401.7, | |
| "valid_targets_min": 681 | |
| }, | |
| { | |
| "epoch": 2.3046683046683047, | |
| "grad_norm": 0.3383126654609071, | |
| "learning_rate": 8.145473598950489e-05, | |
| "loss": 0.12, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05846539884805679, | |
| "step": 470, | |
| "valid_targets_mean": 1435.5, | |
| "valid_targets_min": 686 | |
| }, | |
| { | |
| "epoch": 2.329238329238329, | |
| "grad_norm": 0.3743955050765521, | |
| "learning_rate": 8.107710571631648e-05, | |
| "loss": 0.1213, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0630626231431961, | |
| "step": 475, | |
| "valid_targets_mean": 1401.7, | |
| "valid_targets_min": 761 | |
| }, | |
| { | |
| "epoch": 2.3538083538083536, | |
| "grad_norm": 0.3479810802972055, | |
| "learning_rate": 8.06965644586513e-05, | |
| "loss": 0.1186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05939309298992157, | |
| "step": 480, | |
| "valid_targets_mean": 1252.8, | |
| "valid_targets_min": 600 | |
| }, | |
| { | |
| "epoch": 2.3783783783783785, | |
| "grad_norm": 0.34502770257222604, | |
| "learning_rate": 8.031314786171083e-05, | |
| "loss": 0.1205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05798602104187012, | |
| "step": 485, | |
| "valid_targets_mean": 1292.5, | |
| "valid_targets_min": 599 | |
| }, | |
| { | |
| "epoch": 2.402948402948403, | |
| "grad_norm": 0.361312416737643, | |
| "learning_rate": 7.99268918400288e-05, | |
| "loss": 0.1183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05880609527230263, | |
| "step": 490, | |
| "valid_targets_mean": 1293.4, | |
| "valid_targets_min": 643 | |
| }, | |
| { | |
| "epoch": 2.4275184275184274, | |
| "grad_norm": 0.36520874389005514, | |
| "learning_rate": 7.953783257410713e-05, | |
| "loss": 0.1178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06413908302783966, | |
| "step": 495, | |
| "valid_targets_mean": 1497.6, | |
| "valid_targets_min": 739 | |
| }, | |
| { | |
| "epoch": 2.4520884520884523, | |
| "grad_norm": 0.33310913231009204, | |
| "learning_rate": 7.914600650702691e-05, | |
| "loss": 0.1209, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0641617551445961, | |
| "step": 500, | |
| "valid_targets_mean": 1445.1, | |
| "valid_targets_min": 775 | |
| }, | |
| { | |
| "epoch": 2.4766584766584767, | |
| "grad_norm": 0.37018880178023517, | |
| "learning_rate": 7.875145034103479e-05, | |
| "loss": 0.1187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05668133497238159, | |
| "step": 505, | |
| "valid_targets_mean": 1145.2, | |
| "valid_targets_min": 652 | |
| }, | |
| { | |
| "epoch": 2.501228501228501, | |
| "grad_norm": 0.3699366956357319, | |
| "learning_rate": 7.835420103410504e-05, | |
| "loss": 0.12, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05439180135726929, | |
| "step": 510, | |
| "valid_targets_mean": 1140.4, | |
| "valid_targets_min": 690 | |
| }, | |
| { | |
| "epoch": 2.5257985257985256, | |
| "grad_norm": 0.3493711971707187, | |
| "learning_rate": 7.795429579647781e-05, | |
| "loss": 0.1198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05129605531692505, | |
| "step": 515, | |
| "valid_targets_mean": 1130.1, | |
| "valid_targets_min": 510 | |
| }, | |
| { | |
| "epoch": 2.5503685503685505, | |
| "grad_norm": 0.34249507252623157, | |
| "learning_rate": 7.755177208717356e-05, | |
| "loss": 0.1209, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06780637800693512, | |
| "step": 520, | |
| "valid_targets_mean": 1362.8, | |
| "valid_targets_min": 785 | |
| }, | |
| { | |
| "epoch": 2.574938574938575, | |
| "grad_norm": 0.3570510910221036, | |
| "learning_rate": 7.71466676104843e-05, | |
| "loss": 0.1183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.050399065017700195, | |
| "step": 525, | |
| "valid_targets_mean": 1082.3, | |
| "valid_targets_min": 501 | |
| }, | |
| { | |
| "epoch": 2.5995085995085994, | |
| "grad_norm": 0.3364722657405281, | |
| "learning_rate": 7.673902031244189e-05, | |
| "loss": 0.1203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.062017861753702164, | |
| "step": 530, | |
| "valid_targets_mean": 1341.2, | |
| "valid_targets_min": 662 | |
| }, | |
| { | |
| "epoch": 2.6240786240786242, | |
| "grad_norm": 0.36931412486889464, | |
| "learning_rate": 7.632886837726359e-05, | |
| "loss": 0.1205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06189979240298271, | |
| "step": 535, | |
| "valid_targets_mean": 1222.2, | |
| "valid_targets_min": 679 | |
| }, | |
| { | |
| "epoch": 2.6486486486486487, | |
| "grad_norm": 0.338457123494396, | |
| "learning_rate": 7.591625022377537e-05, | |
| "loss": 0.1206, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05179202929139137, | |
| "step": 540, | |
| "valid_targets_mean": 1184.9, | |
| "valid_targets_min": 492 | |
| }, | |
| { | |
| "epoch": 2.673218673218673, | |
| "grad_norm": 0.3244686185474909, | |
| "learning_rate": 7.550120450181324e-05, | |
| "loss": 0.1184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06525638699531555, | |
| "step": 545, | |
| "valid_targets_mean": 1495.0, | |
| "valid_targets_min": 754 | |
| }, | |
| { | |
| "epoch": 2.697788697788698, | |
| "grad_norm": 0.335778159582729, | |
| "learning_rate": 7.508377008860294e-05, | |
| "loss": 0.1187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.048954833298921585, | |
| "step": 550, | |
| "valid_targets_mean": 1146.8, | |
| "valid_targets_min": 629 | |
| }, | |
| { | |
| "epoch": 2.7223587223587224, | |
| "grad_norm": 0.3423155775776973, | |
| "learning_rate": 7.466398608511826e-05, | |
| "loss": 0.122, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0645413026213646, | |
| "step": 555, | |
| "valid_targets_mean": 1507.3, | |
| "valid_targets_min": 708 | |
| }, | |
| { | |
| "epoch": 2.746928746928747, | |
| "grad_norm": 0.3486117813784432, | |
| "learning_rate": 7.424189181241856e-05, | |
| "loss": 0.1192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05744887888431549, | |
| "step": 560, | |
| "valid_targets_mean": 1291.0, | |
| "valid_targets_min": 654 | |
| }, | |
| { | |
| "epoch": 2.7714987714987718, | |
| "grad_norm": 0.34392630396696766, | |
| "learning_rate": 7.381752680796547e-05, | |
| "loss": 0.1174, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05362129211425781, | |
| "step": 565, | |
| "valid_targets_mean": 1325.0, | |
| "valid_targets_min": 679 | |
| }, | |
| { | |
| "epoch": 2.796068796068796, | |
| "grad_norm": 0.33308716912129516, | |
| "learning_rate": 7.339093082191953e-05, | |
| "loss": 0.1163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05184609442949295, | |
| "step": 570, | |
| "valid_targets_mean": 1360.9, | |
| "valid_targets_min": 585 | |
| }, | |
| { | |
| "epoch": 2.8206388206388207, | |
| "grad_norm": 0.3587925120059478, | |
| "learning_rate": 7.29621438134167e-05, | |
| "loss": 0.1229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0640011578798294, | |
| "step": 575, | |
| "valid_targets_mean": 1299.4, | |
| "valid_targets_min": 735 | |
| }, | |
| { | |
| "epoch": 2.845208845208845, | |
| "grad_norm": 0.3411894303363057, | |
| "learning_rate": 7.253120594682547e-05, | |
| "loss": 0.1235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.058491215109825134, | |
| "step": 580, | |
| "valid_targets_mean": 1390.1, | |
| "valid_targets_min": 816 | |
| }, | |
| { | |
| "epoch": 2.8697788697788695, | |
| "grad_norm": 0.32947595924960127, | |
| "learning_rate": 7.209815758798464e-05, | |
| "loss": 0.1219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.057379547506570816, | |
| "step": 585, | |
| "valid_targets_mean": 1288.5, | |
| "valid_targets_min": 631 | |
| }, | |
| { | |
| "epoch": 2.8943488943488944, | |
| "grad_norm": 0.4917965558943038, | |
| "learning_rate": 7.166303930042233e-05, | |
| "loss": 0.1191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0557168647646904, | |
| "step": 590, | |
| "valid_targets_mean": 1338.1, | |
| "valid_targets_min": 641 | |
| }, | |
| { | |
| "epoch": 2.918918918918919, | |
| "grad_norm": 0.34569685035267406, | |
| "learning_rate": 7.122589184155626e-05, | |
| "loss": 0.121, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06044552102684975, | |
| "step": 595, | |
| "valid_targets_mean": 1258.4, | |
| "valid_targets_min": 670 | |
| }, | |
| { | |
| "epoch": 2.9434889434889433, | |
| "grad_norm": 0.314176762801982, | |
| "learning_rate": 7.078675615887618e-05, | |
| "loss": 0.1211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05718706548213959, | |
| "step": 600, | |
| "valid_targets_mean": 1445.6, | |
| "valid_targets_min": 538 | |
| }, | |
| { | |
| "epoch": 2.968058968058968, | |
| "grad_norm": 0.3242469563892565, | |
| "learning_rate": 7.034567338610819e-05, | |
| "loss": 0.1215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06585130095481873, | |
| "step": 605, | |
| "valid_targets_mean": 1457.1, | |
| "valid_targets_min": 772 | |
| }, | |
| { | |
| "epoch": 2.9926289926289926, | |
| "grad_norm": 0.34892426388331144, | |
| "learning_rate": 6.990268483936189e-05, | |
| "loss": 0.1215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06495223939418793, | |
| "step": 610, | |
| "valid_targets_mean": 1545.8, | |
| "valid_targets_min": 729 | |
| }, | |
| { | |
| "epoch": 3.0147420147420148, | |
| "grad_norm": 0.2977325071801822, | |
| "learning_rate": 6.945783201326015e-05, | |
| "loss": 0.0882, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.037302177399396896, | |
| "step": 615, | |
| "valid_targets_mean": 1406.4, | |
| "valid_targets_min": 681 | |
| }, | |
| { | |
| "epoch": 3.039312039312039, | |
| "grad_norm": 0.3861565704828333, | |
| "learning_rate": 6.901115657705246e-05, | |
| "loss": 0.0709, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.041658252477645874, | |
| "step": 620, | |
| "valid_targets_mean": 1367.2, | |
| "valid_targets_min": 673 | |
| }, | |
| { | |
| "epoch": 3.063882063882064, | |
| "grad_norm": 0.33429050333383475, | |
| "learning_rate": 6.856270037071176e-05, | |
| "loss": 0.0714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.036533765494823456, | |
| "step": 625, | |
| "valid_targets_mean": 1446.1, | |
| "valid_targets_min": 745 | |
| }, | |
| { | |
| "epoch": 3.0884520884520885, | |
| "grad_norm": 0.406298735051371, | |
| "learning_rate": 6.811250540101517e-05, | |
| "loss": 0.0691, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03498723730444908, | |
| "step": 630, | |
| "valid_targets_mean": 1332.6, | |
| "valid_targets_min": 880 | |
| }, | |
| { | |
| "epoch": 3.113022113022113, | |
| "grad_norm": 0.30666504482556517, | |
| "learning_rate": 6.766061383760943e-05, | |
| "loss": 0.0691, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.030431220307946205, | |
| "step": 635, | |
| "valid_targets_mean": 1329.4, | |
| "valid_targets_min": 847 | |
| }, | |
| { | |
| "epoch": 3.1375921375921374, | |
| "grad_norm": 0.3347413979325829, | |
| "learning_rate": 6.72070680090607e-05, | |
| "loss": 0.0694, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.037094853818416595, | |
| "step": 640, | |
| "valid_targets_mean": 1259.8, | |
| "valid_targets_min": 690 | |
| }, | |
| { | |
| "epoch": 3.1621621621621623, | |
| "grad_norm": 0.35070823982092636, | |
| "learning_rate": 6.675191039888978e-05, | |
| "loss": 0.0735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03559793531894684, | |
| "step": 645, | |
| "valid_targets_mean": 1238.9, | |
| "valid_targets_min": 683 | |
| }, | |
| { | |
| "epoch": 3.1867321867321867, | |
| "grad_norm": 0.3480670160346414, | |
| "learning_rate": 6.629518364159259e-05, | |
| "loss": 0.0731, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.034793753176927567, | |
| "step": 650, | |
| "valid_targets_mean": 1273.3, | |
| "valid_targets_min": 740 | |
| }, | |
| { | |
| "epoch": 3.211302211302211, | |
| "grad_norm": 0.3288719326795959, | |
| "learning_rate": 6.583693051864668e-05, | |
| "loss": 0.0743, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0357627272605896, | |
| "step": 655, | |
| "valid_targets_mean": 1355.0, | |
| "valid_targets_min": 673 | |
| }, | |
| { | |
| "epoch": 3.235872235872236, | |
| "grad_norm": 0.3325184929663535, | |
| "learning_rate": 6.537719395450391e-05, | |
| "loss": 0.0735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.035944681614637375, | |
| "step": 660, | |
| "valid_targets_mean": 1264.7, | |
| "valid_targets_min": 586 | |
| }, | |
| { | |
| "epoch": 3.2604422604422605, | |
| "grad_norm": 0.35118501191130574, | |
| "learning_rate": 6.491601701256966e-05, | |
| "loss": 0.0725, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0333075150847435, | |
| "step": 665, | |
| "valid_targets_mean": 1220.9, | |
| "valid_targets_min": 632 | |
| }, | |
| { | |
| "epoch": 3.285012285012285, | |
| "grad_norm": 0.3516843877270984, | |
| "learning_rate": 6.44534428911691e-05, | |
| "loss": 0.0714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.037736207246780396, | |
| "step": 670, | |
| "valid_targets_mean": 1361.2, | |
| "valid_targets_min": 521 | |
| }, | |
| { | |
| "epoch": 3.30958230958231, | |
| "grad_norm": 0.32212096835158077, | |
| "learning_rate": 6.398951491950089e-05, | |
| "loss": 0.0747, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03314409777522087, | |
| "step": 675, | |
| "valid_targets_mean": 1343.1, | |
| "valid_targets_min": 787 | |
| }, | |
| { | |
| "epoch": 3.3341523341523343, | |
| "grad_norm": 0.36619223426782116, | |
| "learning_rate": 6.352427655357848e-05, | |
| "loss": 0.0729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03750663995742798, | |
| "step": 680, | |
| "valid_targets_mean": 1133.4, | |
| "valid_targets_min": 692 | |
| }, | |
| { | |
| "epoch": 3.3587223587223587, | |
| "grad_norm": 0.3712584851066574, | |
| "learning_rate": 6.30577713721596e-05, | |
| "loss": 0.0754, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0373421236872673, | |
| "step": 685, | |
| "valid_targets_mean": 1293.4, | |
| "valid_targets_min": 723 | |
| }, | |
| { | |
| "epoch": 3.383292383292383, | |
| "grad_norm": 0.3413543433956999, | |
| "learning_rate": 6.259004307266426e-05, | |
| "loss": 0.0744, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03667682409286499, | |
| "step": 690, | |
| "valid_targets_mean": 1444.6, | |
| "valid_targets_min": 988 | |
| }, | |
| { | |
| "epoch": 3.407862407862408, | |
| "grad_norm": 0.34966421597251157, | |
| "learning_rate": 6.212113546708165e-05, | |
| "loss": 0.0733, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03585509583353996, | |
| "step": 695, | |
| "valid_targets_mean": 1392.1, | |
| "valid_targets_min": 649 | |
| }, | |
| { | |
| "epoch": 3.4324324324324325, | |
| "grad_norm": 0.33436130188939295, | |
| "learning_rate": 6.165109247786624e-05, | |
| "loss": 0.0752, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03639696165919304, | |
| "step": 700, | |
| "valid_targets_mean": 1271.6, | |
| "valid_targets_min": 613 | |
| }, | |
| { | |
| "epoch": 3.457002457002457, | |
| "grad_norm": 0.3370440922395482, | |
| "learning_rate": 6.117995813382357e-05, | |
| "loss": 0.0733, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03243023902177811, | |
| "step": 705, | |
| "valid_targets_mean": 1190.7, | |
| "valid_targets_min": 668 | |
| }, | |
| { | |
| "epoch": 3.4815724815724813, | |
| "grad_norm": 0.34572473082483673, | |
| "learning_rate": 6.070777656598615e-05, | |
| "loss": 0.0734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03953058272600174, | |
| "step": 710, | |
| "valid_targets_mean": 1412.8, | |
| "valid_targets_min": 608 | |
| }, | |
| { | |
| "epoch": 3.506142506142506, | |
| "grad_norm": 0.32540553671935935, | |
| "learning_rate": 6.023459200347964e-05, | |
| "loss": 0.0738, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03626057878136635, | |
| "step": 715, | |
| "valid_targets_mean": 1160.9, | |
| "valid_targets_min": 682 | |
| }, | |
| { | |
| "epoch": 3.5307125307125307, | |
| "grad_norm": 0.3555455251156863, | |
| "learning_rate": 5.976044876937997e-05, | |
| "loss": 0.0723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.038836769759655, | |
| "step": 720, | |
| "valid_targets_mean": 1545.9, | |
| "valid_targets_min": 865 | |
| }, | |
| { | |
| "epoch": 3.555282555282555, | |
| "grad_norm": 0.32715758611553336, | |
| "learning_rate": 5.9285391276561565e-05, | |
| "loss": 0.0728, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.036427922546863556, | |
| "step": 725, | |
| "valid_targets_mean": 1344.4, | |
| "valid_targets_min": 838 | |
| }, | |
| { | |
| "epoch": 3.57985257985258, | |
| "grad_norm": 0.33795073934789194, | |
| "learning_rate": 5.8809464023537265e-05, | |
| "loss": 0.0742, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.038653090596199036, | |
| "step": 730, | |
| "valid_targets_mean": 1377.6, | |
| "valid_targets_min": 732 | |
| }, | |
| { | |
| "epoch": 3.6044226044226044, | |
| "grad_norm": 0.3213417450855897, | |
| "learning_rate": 5.83327115902901e-05, | |
| "loss": 0.0742, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03317331522703171, | |
| "step": 735, | |
| "valid_targets_mean": 1256.9, | |
| "valid_targets_min": 825 | |
| }, | |
| { | |
| "epoch": 3.628992628992629, | |
| "grad_norm": 0.35052676334106075, | |
| "learning_rate": 5.785517863409752e-05, | |
| "loss": 0.0734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.035290129482746124, | |
| "step": 740, | |
| "valid_targets_mean": 1192.2, | |
| "valid_targets_min": 457 | |
| }, | |
| { | |
| "epoch": 3.6535626535626538, | |
| "grad_norm": 0.3388956895842398, | |
| "learning_rate": 5.737690988534836e-05, | |
| "loss": 0.0729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03399132192134857, | |
| "step": 745, | |
| "valid_targets_mean": 1213.5, | |
| "valid_targets_min": 527 | |
| }, | |
| { | |
| "epoch": 3.678132678132678, | |
| "grad_norm": 0.3669302957387778, | |
| "learning_rate": 5.689795014335296e-05, | |
| "loss": 0.0748, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.038341738283634186, | |
| "step": 750, | |
| "valid_targets_mean": 1240.8, | |
| "valid_targets_min": 624 | |
| }, | |
| { | |
| "epoch": 3.7027027027027026, | |
| "grad_norm": 0.3459031323835537, | |
| "learning_rate": 5.6418344272146816e-05, | |
| "loss": 0.0746, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03371729701757431, | |
| "step": 755, | |
| "valid_targets_mean": 1229.1, | |
| "valid_targets_min": 773 | |
| }, | |
| { | |
| "epoch": 3.7272727272727275, | |
| "grad_norm": 0.3246504376784696, | |
| "learning_rate": 5.593813719628819e-05, | |
| "loss": 0.0743, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03680756688117981, | |
| "step": 760, | |
| "valid_targets_mean": 1345.2, | |
| "valid_targets_min": 631 | |
| }, | |
| { | |
| "epoch": 3.751842751842752, | |
| "grad_norm": 0.31584812697899933, | |
| "learning_rate": 5.545737389664999e-05, | |
| "loss": 0.0745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03372339904308319, | |
| "step": 765, | |
| "valid_targets_mean": 1635.1, | |
| "valid_targets_min": 1048 | |
| }, | |
| { | |
| "epoch": 3.7764127764127764, | |
| "grad_norm": 0.3254038680949618, | |
| "learning_rate": 5.4976099406206516e-05, | |
| "loss": 0.0714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03635542094707489, | |
| "step": 770, | |
| "valid_targets_mean": 1342.9, | |
| "valid_targets_min": 734 | |
| }, | |
| { | |
| "epoch": 3.800982800982801, | |
| "grad_norm": 0.3462002699846917, | |
| "learning_rate": 5.449435880581513e-05, | |
| "loss": 0.0741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03568622097373009, | |
| "step": 775, | |
| "valid_targets_mean": 1382.1, | |
| "valid_targets_min": 619 | |
| }, | |
| { | |
| "epoch": 3.8255528255528253, | |
| "grad_norm": 0.3267035574353153, | |
| "learning_rate": 5.401219721999364e-05, | |
| "loss": 0.0716, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03537577763199806, | |
| "step": 780, | |
| "valid_targets_mean": 1539.4, | |
| "valid_targets_min": 614 | |
| }, | |
| { | |
| "epoch": 3.85012285012285, | |
| "grad_norm": 0.38327734503562755, | |
| "learning_rate": 5.352965981269342e-05, | |
| "loss": 0.0726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03703831881284714, | |
| "step": 785, | |
| "valid_targets_mean": 1188.2, | |
| "valid_targets_min": 600 | |
| }, | |
| { | |
| "epoch": 3.8746928746928746, | |
| "grad_norm": 0.334275811536087, | |
| "learning_rate": 5.304679178306894e-05, | |
| "loss": 0.0721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.035076290369033813, | |
| "step": 790, | |
| "valid_targets_mean": 1480.4, | |
| "valid_targets_min": 475 | |
| }, | |
| { | |
| "epoch": 3.899262899262899, | |
| "grad_norm": 0.33191141500394544, | |
| "learning_rate": 5.2563638361244004e-05, | |
| "loss": 0.072, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03381187468767166, | |
| "step": 795, | |
| "valid_targets_mean": 1222.4, | |
| "valid_targets_min": 636 | |
| }, | |
| { | |
| "epoch": 3.923832923832924, | |
| "grad_norm": 0.3401577978749103, | |
| "learning_rate": 5.2080244804075e-05, | |
| "loss": 0.0716, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03545871376991272, | |
| "step": 800, | |
| "valid_targets_mean": 1154.8, | |
| "valid_targets_min": 704 | |
| }, | |
| { | |
| "epoch": 3.9484029484029484, | |
| "grad_norm": 0.32533812897018854, | |
| "learning_rate": 5.1596656390911756e-05, | |
| "loss": 0.0727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.036433812230825424, | |
| "step": 805, | |
| "valid_targets_mean": 1252.9, | |
| "valid_targets_min": 758 | |
| }, | |
| { | |
| "epoch": 3.972972972972973, | |
| "grad_norm": 0.3090743671273376, | |
| "learning_rate": 5.111291841935619e-05, | |
| "loss": 0.0729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03902567923069, | |
| "step": 810, | |
| "valid_targets_mean": 1423.0, | |
| "valid_targets_min": 833 | |
| }, | |
| { | |
| "epoch": 3.9975429975429977, | |
| "grad_norm": 0.3823054079712023, | |
| "learning_rate": 5.0629076201019364e-05, | |
| "loss": 0.0749, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.053378112614154816, | |
| "step": 815, | |
| "valid_targets_mean": 1398.7, | |
| "valid_targets_min": 680 | |
| }, | |
| { | |
| "epoch": 4.019656019656019, | |
| "grad_norm": 0.27993141223279444, | |
| "learning_rate": 5.014517505727702e-05, | |
| "loss": 0.0453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.018219877034425735, | |
| "step": 820, | |
| "valid_targets_mean": 1362.6, | |
| "valid_targets_min": 796 | |
| }, | |
| { | |
| "epoch": 4.044226044226044, | |
| "grad_norm": 0.3741991961810599, | |
| "learning_rate": 4.966126031502452e-05, | |
| "loss": 0.0413, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.019840583205223083, | |
| "step": 825, | |
| "valid_targets_mean": 1268.1, | |
| "valid_targets_min": 538 | |
| }, | |
| { | |
| "epoch": 4.068796068796069, | |
| "grad_norm": 0.30892475697952576, | |
| "learning_rate": 4.917737730243093e-05, | |
| "loss": 0.0384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.02161870151758194, | |
| "step": 830, | |
| "valid_targets_mean": 1377.0, | |
| "valid_targets_min": 820 | |
| }, | |
| { | |
| "epoch": 4.093366093366093, | |
| "grad_norm": 0.3224276737619888, | |
| "learning_rate": 4.869357134469325e-05, | |
| "loss": 0.039, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.021871669217944145, | |
| "step": 835, | |
| "valid_targets_mean": 1343.4, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 4.117936117936118, | |
| "grad_norm": 0.31835589619396676, | |
| "learning_rate": 4.820988775979074e-05, | |
| "loss": 0.0415, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.020772669464349747, | |
| "step": 840, | |
| "valid_targets_mean": 1411.6, | |
| "valid_targets_min": 697 | |
| }, | |
| { | |
| "epoch": 4.142506142506143, | |
| "grad_norm": 0.3185476210032366, | |
| "learning_rate": 4.772637185424005e-05, | |
| "loss": 0.0401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.017333239316940308, | |
| "step": 845, | |
| "valid_targets_mean": 1268.7, | |
| "valid_targets_min": 848 | |
| }, | |
| { | |
| "epoch": 4.167076167076167, | |
| "grad_norm": 0.3080951677636193, | |
| "learning_rate": 4.724306891885134e-05, | |
| "loss": 0.0402, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.017767902463674545, | |
| "step": 850, | |
| "valid_targets_mean": 1320.8, | |
| "valid_targets_min": 628 | |
| }, | |
| { | |
| "epoch": 4.191646191646192, | |
| "grad_norm": 0.3045852265574028, | |
| "learning_rate": 4.6760024224485915e-05, | |
| "loss": 0.0412, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.020958252251148224, | |
| "step": 855, | |
| "valid_targets_mean": 1377.7, | |
| "valid_targets_min": 874 | |
| }, | |
| { | |
| "epoch": 4.216216216216216, | |
| "grad_norm": 0.33276273204262496, | |
| "learning_rate": 4.627728301781569e-05, | |
| "loss": 0.04, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.022474341094493866, | |
| "step": 860, | |
| "valid_targets_mean": 1423.2, | |
| "valid_targets_min": 840 | |
| }, | |
| { | |
| "epoch": 4.240786240786241, | |
| "grad_norm": 0.3173316619927852, | |
| "learning_rate": 4.5794890517084995e-05, | |
| "loss": 0.0404, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.021615831181406975, | |
| "step": 865, | |
| "valid_targets_mean": 1378.6, | |
| "valid_targets_min": 586 | |
| }, | |
| { | |
| "epoch": 4.2653562653562656, | |
| "grad_norm": 0.3367095693491688, | |
| "learning_rate": 4.531289190787493e-05, | |
| "loss": 0.0399, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.022174552083015442, | |
| "step": 870, | |
| "valid_targets_mean": 1354.9, | |
| "valid_targets_min": 613 | |
| }, | |
| { | |
| "epoch": 4.2899262899262895, | |
| "grad_norm": 0.31236014495985, | |
| "learning_rate": 4.483133233887093e-05, | |
| "loss": 0.0416, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.02302752062678337, | |
| "step": 875, | |
| "valid_targets_mean": 1393.2, | |
| "valid_targets_min": 680 | |
| }, | |
| { | |
| "epoch": 4.314496314496314, | |
| "grad_norm": 0.3316531504924653, | |
| "learning_rate": 4.4350256917633585e-05, | |
| "loss": 0.0405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.021897803992033005, | |
| "step": 880, | |
| "valid_targets_mean": 1357.8, | |
| "valid_targets_min": 709 | |
| }, | |
| { | |
| "epoch": 4.339066339066339, | |
| "grad_norm": 0.31737769102944213, | |
| "learning_rate": 4.386971070637354e-05, | |
| "loss": 0.0419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.02140195667743683, | |
| "step": 885, | |
| "valid_targets_mean": 1482.6, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 4.363636363636363, | |
| "grad_norm": 0.3156768078020588, | |
| "learning_rate": 4.338973871773045e-05, | |
| "loss": 0.0404, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.019348224624991417, | |
| "step": 890, | |
| "valid_targets_mean": 1290.9, | |
| "valid_targets_min": 598 | |
| }, | |
| { | |
| "epoch": 4.388206388206388, | |
| "grad_norm": 0.32838758696752823, | |
| "learning_rate": 4.291038591055668e-05, | |
| "loss": 0.0391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.018940959125757217, | |
| "step": 895, | |
| "valid_targets_mean": 1317.6, | |
| "valid_targets_min": 652 | |
| }, | |
| { | |
| "epoch": 4.412776412776413, | |
| "grad_norm": 0.3483390290744793, | |
| "learning_rate": 4.243169718570606e-05, | |
| "loss": 0.0419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.022482123225927353, | |
| "step": 900, | |
| "valid_targets_mean": 1327.1, | |
| "valid_targets_min": 650 | |
| }, | |
| { | |
| "epoch": 4.437346437346437, | |
| "grad_norm": 0.31185423077558533, | |
| "learning_rate": 4.195371738182796e-05, | |
| "loss": 0.0388, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.01987990364432335, | |
| "step": 905, | |
| "valid_targets_mean": 1322.4, | |
| "valid_targets_min": 681 | |
| }, | |
| { | |
| "epoch": 4.461916461916462, | |
| "grad_norm": 0.3520431767140775, | |
| "learning_rate": 4.147649127116735e-05, | |
| "loss": 0.0416, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.021981684491038322, | |
| "step": 910, | |
| "valid_targets_mean": 1278.7, | |
| "valid_targets_min": 651 | |
| }, | |
| { | |
| "epoch": 4.486486486486487, | |
| "grad_norm": 0.3139898395853098, | |
| "learning_rate": 4.1000063555370894e-05, | |
| "loss": 0.0416, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.020710209384560585, | |
| "step": 915, | |
| "valid_targets_mean": 1371.6, | |
| "valid_targets_min": 501 | |
| }, | |
| { | |
| "epoch": 4.511056511056511, | |
| "grad_norm": 0.34028718296899796, | |
| "learning_rate": 4.052447886129986e-05, | |
| "loss": 0.0413, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.022445928305387497, | |
| "step": 920, | |
| "valid_targets_mean": 1329.0, | |
| "valid_targets_min": 686 | |
| }, | |
| { | |
| "epoch": 4.535626535626536, | |
| "grad_norm": 0.34533064643329925, | |
| "learning_rate": 4.004978173684988e-05, | |
| "loss": 0.0421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.018675517290830612, | |
| "step": 925, | |
| "valid_targets_mean": 1227.1, | |
| "valid_targets_min": 703 | |
| }, | |
| { | |
| "epoch": 4.560196560196561, | |
| "grad_norm": 0.3234418433633803, | |
| "learning_rate": 3.957601664677816e-05, | |
| "loss": 0.0409, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.018068864941596985, | |
| "step": 930, | |
| "valid_targets_mean": 1164.3, | |
| "valid_targets_min": 671 | |
| }, | |
| { | |
| "epoch": 4.584766584766585, | |
| "grad_norm": 0.32817885450102535, | |
| "learning_rate": 3.910322796853848e-05, | |
| "loss": 0.0413, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.023279931396245956, | |
| "step": 935, | |
| "valid_targets_mean": 1372.6, | |
| "valid_targets_min": 770 | |
| }, | |
| { | |
| "epoch": 4.6093366093366095, | |
| "grad_norm": 0.31227498772612844, | |
| "learning_rate": 3.86314599881244e-05, | |
| "loss": 0.041, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.021624624729156494, | |
| "step": 940, | |
| "valid_targets_mean": 1521.3, | |
| "valid_targets_min": 555 | |
| }, | |
| { | |
| "epoch": 4.6339066339066335, | |
| "grad_norm": 0.32927359413012675, | |
| "learning_rate": 3.816075689592095e-05, | |
| "loss": 0.0409, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.02119259163737297, | |
| "step": 945, | |
| "valid_targets_mean": 1311.4, | |
| "valid_targets_min": 670 | |
| }, | |
| { | |
| "epoch": 4.658476658476658, | |
| "grad_norm": 0.3264412276653931, | |
| "learning_rate": 3.7691162782565383e-05, | |
| "loss": 0.0378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.019723929464817047, | |
| "step": 950, | |
| "valid_targets_mean": 1315.0, | |
| "valid_targets_min": 576 | |
| }, | |
| { | |
| "epoch": 4.683046683046683, | |
| "grad_norm": 0.3109817640497461, | |
| "learning_rate": 3.7222721634817146e-05, | |
| "loss": 0.0405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.019970854744315147, | |
| "step": 955, | |
| "valid_targets_mean": 1293.0, | |
| "valid_targets_min": 734 | |
| }, | |
| { | |
| "epoch": 4.707616707616707, | |
| "grad_norm": 0.30543197648210796, | |
| "learning_rate": 3.675547733143776e-05, | |
| "loss": 0.0389, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.017612136900424957, | |
| "step": 960, | |
| "valid_targets_mean": 1288.9, | |
| "valid_targets_min": 762 | |
| }, | |
| { | |
| "epoch": 4.732186732186732, | |
| "grad_norm": 0.3211505739622534, | |
| "learning_rate": 3.628947363908058e-05, | |
| "loss": 0.0399, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.019393999129533768, | |
| "step": 965, | |
| "valid_targets_mean": 1484.4, | |
| "valid_targets_min": 863 | |
| }, | |
| { | |
| "epoch": 4.756756756756757, | |
| "grad_norm": 0.33268734426995283, | |
| "learning_rate": 3.582475420819129e-05, | |
| "loss": 0.0395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.017341013997793198, | |
| "step": 970, | |
| "valid_targets_mean": 1231.2, | |
| "valid_targets_min": 625 | |
| }, | |
| { | |
| "epoch": 4.781326781326781, | |
| "grad_norm": 0.320969799122163, | |
| "learning_rate": 3.53613625689191e-05, | |
| "loss": 0.0399, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.01869683712720871, | |
| "step": 975, | |
| "valid_targets_mean": 1218.4, | |
| "valid_targets_min": 624 | |
| }, | |
| { | |
| "epoch": 4.805896805896806, | |
| "grad_norm": 0.3114776209556068, | |
| "learning_rate": 3.489934212703936e-05, | |
| "loss": 0.0393, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.018647339195013046, | |
| "step": 980, | |
| "valid_targets_mean": 1183.8, | |
| "valid_targets_min": 559 | |
| }, | |
| { | |
| "epoch": 4.830466830466831, | |
| "grad_norm": 0.33522401899380994, | |
| "learning_rate": 3.4438736159887665e-05, | |
| "loss": 0.0381, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.019779419526457787, | |
| "step": 985, | |
| "valid_targets_mean": 1327.4, | |
| "valid_targets_min": 639 | |
| }, | |
| { | |
| "epoch": 4.855036855036855, | |
| "grad_norm": 0.3382381310169906, | |
| "learning_rate": 3.3979587812306196e-05, | |
| "loss": 0.0381, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.01958424225449562, | |
| "step": 990, | |
| "valid_targets_mean": 1366.7, | |
| "valid_targets_min": 822 | |
| }, | |
| { | |
| "epoch": 4.87960687960688, | |
| "grad_norm": 0.3315405268055381, | |
| "learning_rate": 3.352194009260221e-05, | |
| "loss": 0.0401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.018945585936307907, | |
| "step": 995, | |
| "valid_targets_mean": 1148.9, | |
| "valid_targets_min": 606 | |
| }, | |
| { | |
| "epoch": 4.9041769041769046, | |
| "grad_norm": 0.3115079992976071, | |
| "learning_rate": 3.306583586851956e-05, | |
| "loss": 0.0376, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.021324384957551956, | |
| "step": 1000, | |
| "valid_targets_mean": 1306.4, | |
| "valid_targets_min": 571 | |
| }, | |
| { | |
| "epoch": 4.9287469287469285, | |
| "grad_norm": 0.3078542084967762, | |
| "learning_rate": 3.26113178632233e-05, | |
| "loss": 0.0416, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.02121821790933609, | |
| "step": 1005, | |
| "valid_targets_mean": 1202.2, | |
| "valid_targets_min": 576 | |
| }, | |
| { | |
| "epoch": 4.953316953316953, | |
| "grad_norm": 0.3017240699598429, | |
| "learning_rate": 3.215842865129773e-05, | |
| "loss": 0.0385, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.01800459809601307, | |
| "step": 1010, | |
| "valid_targets_mean": 1340.1, | |
| "valid_targets_min": 587 | |
| }, | |
| { | |
| "epoch": 4.977886977886978, | |
| "grad_norm": 0.3247961752433559, | |
| "learning_rate": 3.1707210654758556e-05, | |
| "loss": 0.038, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.01770874857902527, | |
| "step": 1015, | |
| "valid_targets_mean": 1250.6, | |
| "valid_targets_min": 632 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.36428797184528683, | |
| "learning_rate": 3.125770613907909e-05, | |
| "loss": 0.0364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.027929894626140594, | |
| "step": 1020, | |
| "valid_targets_mean": 1413.3, | |
| "valid_targets_min": 603 | |
| }, | |
| { | |
| "epoch": 5.024570024570025, | |
| "grad_norm": 0.26133962940041444, | |
| "learning_rate": 3.08099572092314e-05, | |
| "loss": 0.0204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.010166799649596214, | |
| "step": 1025, | |
| "valid_targets_mean": 1211.6, | |
| "valid_targets_min": 668 | |
| }, | |
| { | |
| "epoch": 5.049140049140049, | |
| "grad_norm": 0.35013474614494267, | |
| "learning_rate": 3.0364005805742246e-05, | |
| "loss": 0.0206, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.010145038366317749, | |
| "step": 1030, | |
| "valid_targets_mean": 1168.4, | |
| "valid_targets_min": 666 | |
| }, | |
| { | |
| "epoch": 5.073710073710074, | |
| "grad_norm": 0.282893581071777, | |
| "learning_rate": 2.9919893700764566e-05, | |
| "loss": 0.0202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.01128455065190792, | |
| "step": 1035, | |
| "valid_targets_mean": 1338.1, | |
| "valid_targets_min": 741 | |
| }, | |
| { | |
| "epoch": 5.098280098280099, | |
| "grad_norm": 0.28533102889928075, | |
| "learning_rate": 2.9477662494164703e-05, | |
| "loss": 0.02, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.010152880102396011, | |
| "step": 1040, | |
| "valid_targets_mean": 1345.1, | |
| "valid_targets_min": 564 | |
| }, | |
| { | |
| "epoch": 5.122850122850123, | |
| "grad_norm": 0.314478747850382, | |
| "learning_rate": 2.9037353609625695e-05, | |
| "loss": 0.021, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.010847135446965694, | |
| "step": 1045, | |
| "valid_targets_mean": 1294.2, | |
| "valid_targets_min": 680 | |
| }, | |
| { | |
| "epoch": 5.1474201474201475, | |
| "grad_norm": 0.26706110402851246, | |
| "learning_rate": 2.8599008290767204e-05, | |
| "loss": 0.0193, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.009428557008504868, | |
| "step": 1050, | |
| "valid_targets_mean": 1452.9, | |
| "valid_targets_min": 725 | |
| }, | |
| { | |
| "epoch": 5.171990171990172, | |
| "grad_norm": 0.29886945041340757, | |
| "learning_rate": 2.8162667597282176e-05, | |
| "loss": 0.0204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.012537685222923756, | |
| "step": 1055, | |
| "valid_targets_mean": 1264.2, | |
| "valid_targets_min": 669 | |
| }, | |
| { | |
| "epoch": 5.196560196560196, | |
| "grad_norm": 0.3020986480627111, | |
| "learning_rate": 2.7728372401090806e-05, | |
| "loss": 0.0197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.010673290118575096, | |
| "step": 1060, | |
| "valid_targets_mean": 1316.6, | |
| "valid_targets_min": 609 | |
| }, | |
| { | |
| "epoch": 5.221130221130221, | |
| "grad_norm": 0.2900262078972821, | |
| "learning_rate": 2.729616338251215e-05, | |
| "loss": 0.0198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.009456290863454342, | |
| "step": 1065, | |
| "valid_targets_mean": 1233.6, | |
| "valid_targets_min": 736 | |
| }, | |
| { | |
| "epoch": 5.245700245700245, | |
| "grad_norm": 0.3084078147531761, | |
| "learning_rate": 2.686608102645347e-05, | |
| "loss": 0.0196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.009841931983828545, | |
| "step": 1070, | |
| "valid_targets_mean": 1166.8, | |
| "valid_targets_min": 849 | |
| }, | |
| { | |
| "epoch": 5.27027027027027, | |
| "grad_norm": 0.299437980932929, | |
| "learning_rate": 2.6438165618618127e-05, | |
| "loss": 0.0203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.011637433432042599, | |
| "step": 1075, | |
| "valid_targets_mean": 1208.6, | |
| "valid_targets_min": 700 | |
| }, | |
| { | |
| "epoch": 5.294840294840295, | |
| "grad_norm": 0.3077946376633561, | |
| "learning_rate": 2.6012457241731986e-05, | |
| "loss": 0.0194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.01187128946185112, | |
| "step": 1080, | |
| "valid_targets_mean": 1109.1, | |
| "valid_targets_min": 577 | |
| }, | |
| { | |
| "epoch": 5.319410319410319, | |
| "grad_norm": 0.30109678654303274, | |
| "learning_rate": 2.5588995771788942e-05, | |
| "loss": 0.0213, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.01099569071084261, | |
| "step": 1085, | |
| "valid_targets_mean": 1176.6, | |
| "valid_targets_min": 697 | |
| }, | |
| { | |
| "epoch": 5.343980343980344, | |
| "grad_norm": 0.2895876653364538, | |
| "learning_rate": 2.516782087431565e-05, | |
| "loss": 0.0198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.00905087310820818, | |
| "step": 1090, | |
| "valid_targets_mean": 1213.7, | |
| "valid_targets_min": 680 | |
| }, | |
| { | |
| "epoch": 5.368550368550369, | |
| "grad_norm": 0.3103618365984646, | |
| "learning_rate": 2.474897200065611e-05, | |
| "loss": 0.0198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.012017014436423779, | |
| "step": 1095, | |
| "valid_targets_mean": 1355.6, | |
| "valid_targets_min": 841 | |
| }, | |
| { | |
| "epoch": 5.393120393120393, | |
| "grad_norm": 0.281975216305729, | |
| "learning_rate": 2.433248838427628e-05, | |
| "loss": 0.0206, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.011380782350897789, | |
| "step": 1100, | |
| "valid_targets_mean": 1249.9, | |
| "valid_targets_min": 690 | |
| }, | |
| { | |
| "epoch": 5.417690417690418, | |
| "grad_norm": 0.25746102969425927, | |
| "learning_rate": 2.3918409037089112e-05, | |
| "loss": 0.0187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.008737726137042046, | |
| "step": 1105, | |
| "valid_targets_mean": 1287.6, | |
| "valid_targets_min": 650 | |
| }, | |
| { | |
| "epoch": 5.442260442260443, | |
| "grad_norm": 0.3169046454091352, | |
| "learning_rate": 2.3506772745800238e-05, | |
| "loss": 0.0192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.010114330798387527, | |
| "step": 1110, | |
| "valid_targets_mean": 1159.0, | |
| "valid_targets_min": 619 | |
| }, | |
| { | |
| "epoch": 5.466830466830467, | |
| "grad_norm": 0.2540569088631003, | |
| "learning_rate": 2.309761806827489e-05, | |
| "loss": 0.0183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.008340870961546898, | |
| "step": 1115, | |
| "valid_targets_mean": 1512.6, | |
| "valid_targets_min": 864 | |
| }, | |
| { | |
| "epoch": 5.4914004914004915, | |
| "grad_norm": 0.26922436587552395, | |
| "learning_rate": 2.2690983329926157e-05, | |
| "loss": 0.019, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.008892050012946129, | |
| "step": 1120, | |
| "valid_targets_mean": 1536.9, | |
| "valid_targets_min": 789 | |
| }, | |
| { | |
| "epoch": 5.515970515970516, | |
| "grad_norm": 0.3138051935360876, | |
| "learning_rate": 2.228690662012514e-05, | |
| "loss": 0.0198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.011595183052122593, | |
| "step": 1125, | |
| "valid_targets_mean": 1244.7, | |
| "valid_targets_min": 651 | |
| }, | |
| { | |
| "epoch": 5.54054054054054, | |
| "grad_norm": 0.3117819215203409, | |
| "learning_rate": 2.1885425788633e-05, | |
| "loss": 0.0201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.010435843840241432, | |
| "step": 1130, | |
| "valid_targets_mean": 1302.9, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 5.565110565110565, | |
| "grad_norm": 0.3069592336125717, | |
| "learning_rate": 2.1486578442055672e-05, | |
| "loss": 0.02, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.010668829083442688, | |
| "step": 1135, | |
| "valid_targets_mean": 1159.2, | |
| "valid_targets_min": 648 | |
| }, | |
| { | |
| "epoch": 5.58968058968059, | |
| "grad_norm": 0.31656524373785344, | |
| "learning_rate": 2.1090401940321212e-05, | |
| "loss": 0.0181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.011099237948656082, | |
| "step": 1140, | |
| "valid_targets_mean": 1232.4, | |
| "valid_targets_min": 682 | |
| }, | |
| { | |
| "epoch": 5.614250614250614, | |
| "grad_norm": 0.2670591117298401, | |
| "learning_rate": 2.0696933393180397e-05, | |
| "loss": 0.0176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.010421942919492722, | |
| "step": 1145, | |
| "valid_targets_mean": 1465.6, | |
| "valid_targets_min": 693 | |
| }, | |
| { | |
| "epoch": 5.638820638820639, | |
| "grad_norm": 0.3182550958234908, | |
| "learning_rate": 2.0306209656730523e-05, | |
| "loss": 0.0187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.010414216667413712, | |
| "step": 1150, | |
| "valid_targets_mean": 1375.6, | |
| "valid_targets_min": 663 | |
| }, | |
| { | |
| "epoch": 5.663390663390663, | |
| "grad_norm": 0.2662439452158416, | |
| "learning_rate": 1.991826732996319e-05, | |
| "loss": 0.0184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.009408965706825256, | |
| "step": 1155, | |
| "valid_targets_mean": 1455.5, | |
| "valid_targets_min": 857 | |
| }, | |
| { | |
| "epoch": 5.687960687960688, | |
| "grad_norm": 0.25849619635330695, | |
| "learning_rate": 1.9533142751336126e-05, | |
| "loss": 0.0182, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.008321743458509445, | |
| "step": 1160, | |
| "valid_targets_mean": 1318.8, | |
| "valid_targets_min": 648 | |
| }, | |
| { | |
| "epoch": 5.712530712530713, | |
| "grad_norm": 0.2421276566574057, | |
| "learning_rate": 1.915087199536925e-05, | |
| "loss": 0.0185, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.007764388341456652, | |
| "step": 1165, | |
| "valid_targets_mean": 1301.2, | |
| "valid_targets_min": 673 | |
| }, | |
| { | |
| "epoch": 5.737100737100737, | |
| "grad_norm": 0.2909298663388158, | |
| "learning_rate": 1.8771490869265686e-05, | |
| "loss": 0.0181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.009419236332178116, | |
| "step": 1170, | |
| "valid_targets_mean": 1192.9, | |
| "valid_targets_min": 690 | |
| }, | |
| { | |
| "epoch": 5.761670761670762, | |
| "grad_norm": 0.26713609394086596, | |
| "learning_rate": 1.839503490955763e-05, | |
| "loss": 0.0176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.008326014503836632, | |
| "step": 1175, | |
| "valid_targets_mean": 1488.6, | |
| "valid_targets_min": 647 | |
| }, | |
| { | |
| "epoch": 5.7862407862407865, | |
| "grad_norm": 0.26467576781241553, | |
| "learning_rate": 1.802153937877777e-05, | |
| "loss": 0.0183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.008647764101624489, | |
| "step": 1180, | |
| "valid_targets_mean": 1414.1, | |
| "valid_targets_min": 559 | |
| }, | |
| { | |
| "epoch": 5.8108108108108105, | |
| "grad_norm": 0.26641667371207894, | |
| "learning_rate": 1.7651039262156126e-05, | |
| "loss": 0.0173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.008846086449921131, | |
| "step": 1185, | |
| "valid_targets_mean": 1301.8, | |
| "valid_targets_min": 709 | |
| }, | |
| { | |
| "epoch": 5.835380835380835, | |
| "grad_norm": 0.31650104270109913, | |
| "learning_rate": 1.728356926434306e-05, | |
| "loss": 0.0178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.009932863526046276, | |
| "step": 1190, | |
| "valid_targets_mean": 1253.7, | |
| "valid_targets_min": 728 | |
| }, | |
| { | |
| "epoch": 5.85995085995086, | |
| "grad_norm": 0.2836541956422589, | |
| "learning_rate": 1.6919163806158455e-05, | |
| "loss": 0.0184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.008826677687466145, | |
| "step": 1195, | |
| "valid_targets_mean": 1219.0, | |
| "valid_targets_min": 563 | |
| }, | |
| { | |
| "epoch": 5.884520884520884, | |
| "grad_norm": 0.2943453272730564, | |
| "learning_rate": 1.655785702136764e-05, | |
| "loss": 0.0177, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.009762442670762539, | |
| "step": 1200, | |
| "valid_targets_mean": 1260.2, | |
| "valid_targets_min": 783 | |
| }, | |
| { | |
| "epoch": 5.909090909090909, | |
| "grad_norm": 0.27725462162946896, | |
| "learning_rate": 1.6199682753483926e-05, | |
| "loss": 0.0181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.007736817002296448, | |
| "step": 1205, | |
| "valid_targets_mean": 1286.1, | |
| "valid_targets_min": 679 | |
| }, | |
| { | |
| "epoch": 5.933660933660933, | |
| "grad_norm": 0.2823492995999207, | |
| "learning_rate": 1.584467455259861e-05, | |
| "loss": 0.0213, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.00895984098315239, | |
| "step": 1210, | |
| "valid_targets_mean": 1235.6, | |
| "valid_targets_min": 770 | |
| }, | |
| { | |
| "epoch": 5.958230958230958, | |
| "grad_norm": 0.274768095515548, | |
| "learning_rate": 1.5492865672238276e-05, | |
| "loss": 0.0178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.009178702719509602, | |
| "step": 1215, | |
| "valid_targets_mean": 1213.4, | |
| "valid_targets_min": 575 | |
| }, | |
| { | |
| "epoch": 5.982800982800983, | |
| "grad_norm": 0.2571522327732023, | |
| "learning_rate": 1.5144289066250045e-05, | |
| "loss": 0.0178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.008877310901880264, | |
| "step": 1220, | |
| "valid_targets_mean": 1334.7, | |
| "valid_targets_min": 751 | |
| }, | |
| { | |
| "epoch": 6.004914004914005, | |
| "grad_norm": 0.16174171115393154, | |
| "learning_rate": 1.479897738571468e-05, | |
| "loss": 0.0152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.004827571101486683, | |
| "step": 1225, | |
| "valid_targets_mean": 1169.8, | |
| "valid_targets_min": 582 | |
| }, | |
| { | |
| "epoch": 6.0294840294840295, | |
| "grad_norm": 0.18569722400832173, | |
| "learning_rate": 1.4456962975888216e-05, | |
| "loss": 0.0078, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0035782852210104465, | |
| "step": 1230, | |
| "valid_targets_mean": 1211.8, | |
| "valid_targets_min": 607 | |
| }, | |
| { | |
| "epoch": 6.054054054054054, | |
| "grad_norm": 0.21491424856633037, | |
| "learning_rate": 1.4118277873172208e-05, | |
| "loss": 0.0081, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.004247656557708979, | |
| "step": 1235, | |
| "valid_targets_mean": 1248.5, | |
| "valid_targets_min": 639 | |
| }, | |
| { | |
| "epoch": 6.078624078624078, | |
| "grad_norm": 0.25016395772751115, | |
| "learning_rate": 1.378295380211289e-05, | |
| "loss": 0.0082, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0046090735122561455, | |
| "step": 1240, | |
| "valid_targets_mean": 1469.5, | |
| "valid_targets_min": 899 | |
| }, | |
| { | |
| "epoch": 6.103194103194103, | |
| "grad_norm": 0.22993854572734307, | |
| "learning_rate": 1.3451022172429495e-05, | |
| "loss": 0.0086, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.004097792319953442, | |
| "step": 1245, | |
| "valid_targets_mean": 1390.2, | |
| "valid_targets_min": 845 | |
| }, | |
| { | |
| "epoch": 6.127764127764128, | |
| "grad_norm": 0.20768384003216256, | |
| "learning_rate": 1.3122514076072163e-05, | |
| "loss": 0.0077, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0044163367711007595, | |
| "step": 1250, | |
| "valid_targets_mean": 1341.2, | |
| "valid_targets_min": 897 | |
| }, | |
| { | |
| "epoch": 6.152334152334152, | |
| "grad_norm": 0.22806257458798243, | |
| "learning_rate": 1.2797460284309532e-05, | |
| "loss": 0.008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.003973452374339104, | |
| "step": 1255, | |
| "valid_targets_mean": 1167.9, | |
| "valid_targets_min": 715 | |
| }, | |
| { | |
| "epoch": 6.176904176904177, | |
| "grad_norm": 0.18667243280171153, | |
| "learning_rate": 1.247589124484646e-05, | |
| "loss": 0.0076, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.003657914698123932, | |
| "step": 1260, | |
| "valid_targets_mean": 1442.6, | |
| "valid_targets_min": 636 | |
| }, | |
| { | |
| "epoch": 6.201474201474202, | |
| "grad_norm": 0.23820555371148794, | |
| "learning_rate": 1.2157837078971928e-05, | |
| "loss": 0.008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.003562439698725939, | |
| "step": 1265, | |
| "valid_targets_mean": 1425.5, | |
| "valid_targets_min": 755 | |
| }, | |
| { | |
| "epoch": 6.226044226044226, | |
| "grad_norm": 0.26784606697716024, | |
| "learning_rate": 1.1843327578737612e-05, | |
| "loss": 0.0084, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.005037506110966206, | |
| "step": 1270, | |
| "valid_targets_mean": 1198.7, | |
| "valid_targets_min": 726 | |
| }, | |
| { | |
| "epoch": 6.250614250614251, | |
| "grad_norm": 0.21603907352417645, | |
| "learning_rate": 1.1532392204167275e-05, | |
| "loss": 0.0075, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0038490337319672108, | |
| "step": 1275, | |
| "valid_targets_mean": 1141.4, | |
| "valid_targets_min": 698 | |
| }, | |
| { | |
| "epoch": 6.275184275184275, | |
| "grad_norm": 0.2346178380054373, | |
| "learning_rate": 1.1225060080497257e-05, | |
| "loss": 0.0081, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.004704365506768227, | |
| "step": 1280, | |
| "valid_targets_mean": 1300.5, | |
| "valid_targets_min": 577 | |
| }, | |
| { | |
| "epoch": 6.2997542997543, | |
| "grad_norm": 0.18637031994574188, | |
| "learning_rate": 1.092135999544831e-05, | |
| "loss": 0.0075, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0033987853676080704, | |
| "step": 1285, | |
| "valid_targets_mean": 1284.4, | |
| "valid_targets_min": 760 | |
| }, | |
| { | |
| "epoch": 6.324324324324325, | |
| "grad_norm": 0.24995090437047596, | |
| "learning_rate": 1.0621320396529056e-05, | |
| "loss": 0.0084, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.004618373699486256, | |
| "step": 1290, | |
| "valid_targets_mean": 1206.5, | |
| "valid_targets_min": 633 | |
| }, | |
| { | |
| "epoch": 6.348894348894349, | |
| "grad_norm": 0.218872542773124, | |
| "learning_rate": 1.0324969388371364e-05, | |
| "loss": 0.0077, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.004193416330963373, | |
| "step": 1295, | |
| "valid_targets_mean": 1309.2, | |
| "valid_targets_min": 728 | |
| }, | |
| { | |
| "epoch": 6.3734643734643734, | |
| "grad_norm": 0.19603164940260448, | |
| "learning_rate": 1.0032334730097715e-05, | |
| "loss": 0.0078, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.003843929385766387, | |
| "step": 1300, | |
| "valid_targets_mean": 1404.2, | |
| "valid_targets_min": 675 | |
| }, | |
| { | |
| "epoch": 6.398034398034398, | |
| "grad_norm": 0.20046639841749483, | |
| "learning_rate": 9.743443832721055e-06, | |
| "loss": 0.0079, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.004076910670846701, | |
| "step": 1305, | |
| "valid_targets_mean": 1295.1, | |
| "valid_targets_min": 601 | |
| }, | |
| { | |
| "epoch": 6.422604422604422, | |
| "grad_norm": 0.21634516207911922, | |
| "learning_rate": 9.458323756577264e-06, | |
| "loss": 0.0076, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0035616583190858364, | |
| "step": 1310, | |
| "valid_targets_mean": 1419.7, | |
| "valid_targets_min": 820 | |
| }, | |
| { | |
| "epoch": 6.447174447174447, | |
| "grad_norm": 0.1920680703820834, | |
| "learning_rate": 9.17700120879031e-06, | |
| "loss": 0.007, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.003503095591440797, | |
| "step": 1315, | |
| "valid_targets_mean": 1528.4, | |
| "valid_targets_min": 964 | |
| }, | |
| { | |
| "epoch": 6.471744471744472, | |
| "grad_norm": 0.22880019301249582, | |
| "learning_rate": 8.899502540770688e-06, | |
| "loss": 0.008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0037546579260379076, | |
| "step": 1320, | |
| "valid_targets_mean": 1280.2, | |
| "valid_targets_min": 633 | |
| }, | |
| { | |
| "epoch": 6.496314496314496, | |
| "grad_norm": 0.2514546590455702, | |
| "learning_rate": 8.625853745747048e-06, | |
| "loss": 0.0074, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.004576146602630615, | |
| "step": 1325, | |
| "valid_targets_mean": 1257.1, | |
| "valid_targets_min": 575 | |
| }, | |
| { | |
| "epoch": 6.520884520884521, | |
| "grad_norm": 0.2554072646178069, | |
| "learning_rate": 8.35608045633145e-06, | |
| "loss": 0.0073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0029603377915918827, | |
| "step": 1330, | |
| "valid_targets_mean": 1260.9, | |
| "valid_targets_min": 694 | |
| }, | |
| { | |
| "epoch": 6.545454545454545, | |
| "grad_norm": 0.215953034629028, | |
| "learning_rate": 8.090207942118333e-06, | |
| "loss": 0.0076, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.00399182690307498, | |
| "step": 1335, | |
| "valid_targets_mean": 1512.6, | |
| "valid_targets_min": 622 | |
| }, | |
| { | |
| "epoch": 6.57002457002457, | |
| "grad_norm": 0.2388743990811728, | |
| "learning_rate": 7.82826110731752e-06, | |
| "loss": 0.0074, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.004739910364151001, | |
| "step": 1340, | |
| "valid_targets_mean": 1261.9, | |
| "valid_targets_min": 550 | |
| }, | |
| { | |
| "epoch": 6.594594594594595, | |
| "grad_norm": 0.19693613080448677, | |
| "learning_rate": 7.570264488421447e-06, | |
| "loss": 0.0075, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0029330942779779434, | |
| "step": 1345, | |
| "valid_targets_mean": 1331.1, | |
| "valid_targets_min": 786 | |
| }, | |
| { | |
| "epoch": 6.61916461916462, | |
| "grad_norm": 0.21706119299314372, | |
| "learning_rate": 7.3162422519068966e-06, | |
| "loss": 0.0076, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0039455159567296505, | |
| "step": 1350, | |
| "valid_targets_mean": 1311.2, | |
| "valid_targets_min": 614 | |
| }, | |
| { | |
| "epoch": 6.643734643734644, | |
| "grad_norm": 0.169159406905177, | |
| "learning_rate": 7.066218191971219e-06, | |
| "loss": 0.0073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0025496738962829113, | |
| "step": 1355, | |
| "valid_targets_mean": 1424.5, | |
| "valid_targets_min": 732 | |
| }, | |
| { | |
| "epoch": 6.6683046683046685, | |
| "grad_norm": 0.2122964661186571, | |
| "learning_rate": 6.820215728303625e-06, | |
| "loss": 0.0071, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0036278744228184223, | |
| "step": 1360, | |
| "valid_targets_mean": 1245.1, | |
| "valid_targets_min": 629 | |
| }, | |
| { | |
| "epoch": 6.6928746928746925, | |
| "grad_norm": 0.20634084259782276, | |
| "learning_rate": 6.578257903891427e-06, | |
| "loss": 0.0066, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.003295808332040906, | |
| "step": 1365, | |
| "valid_targets_mean": 1429.2, | |
| "valid_targets_min": 730 | |
| }, | |
| { | |
| "epoch": 6.717444717444717, | |
| "grad_norm": 0.21991786675018984, | |
| "learning_rate": 6.34036738286165e-06, | |
| "loss": 0.0071, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0043110596016049385, | |
| "step": 1370, | |
| "valid_targets_mean": 1328.5, | |
| "valid_targets_min": 693 | |
| }, | |
| { | |
| "epoch": 6.742014742014742, | |
| "grad_norm": 0.24788989146170717, | |
| "learning_rate": 6.106566448358025e-06, | |
| "loss": 0.0071, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.004148700274527073, | |
| "step": 1375, | |
| "valid_targets_mean": 1244.7, | |
| "valid_targets_min": 796 | |
| }, | |
| { | |
| "epoch": 6.766584766584766, | |
| "grad_norm": 0.20977102960147215, | |
| "learning_rate": 5.8768770004537894e-06, | |
| "loss": 0.007, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0038808349054306746, | |
| "step": 1380, | |
| "valid_targets_mean": 1229.1, | |
| "valid_targets_min": 679 | |
| }, | |
| { | |
| "epoch": 6.791154791154791, | |
| "grad_norm": 0.18128751410719413, | |
| "learning_rate": 5.65132055410027e-06, | |
| "loss": 0.0065, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.003224300919100642, | |
| "step": 1385, | |
| "valid_targets_mean": 1343.8, | |
| "valid_targets_min": 740 | |
| }, | |
| { | |
| "epoch": 6.815724815724816, | |
| "grad_norm": 0.2243474366269306, | |
| "learning_rate": 5.429918237111642e-06, | |
| "loss": 0.0059, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0031104448717087507, | |
| "step": 1390, | |
| "valid_targets_mean": 1281.1, | |
| "valid_targets_min": 508 | |
| }, | |
| { | |
| "epoch": 6.84029484029484, | |
| "grad_norm": 0.22158595978990048, | |
| "learning_rate": 5.21269078818582e-06, | |
| "loss": 0.0067, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0033817810472100973, | |
| "step": 1395, | |
| "valid_targets_mean": 1257.1, | |
| "valid_targets_min": 840 | |
| }, | |
| { | |
| "epoch": 6.864864864864865, | |
| "grad_norm": 0.23250133634103248, | |
| "learning_rate": 4.999658554961917e-06, | |
| "loss": 0.0094, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.004702032543718815, | |
| "step": 1400, | |
| "valid_targets_mean": 1354.7, | |
| "valid_targets_min": 555 | |
| }, | |
| { | |
| "epoch": 6.88943488943489, | |
| "grad_norm": 0.22241489932962594, | |
| "learning_rate": 4.790841492114256e-06, | |
| "loss": 0.0073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.003928256221115589, | |
| "step": 1405, | |
| "valid_targets_mean": 1285.6, | |
| "valid_targets_min": 773 | |
| }, | |
| { | |
| "epoch": 6.914004914004914, | |
| "grad_norm": 0.22390574620767395, | |
| "learning_rate": 4.586259159483286e-06, | |
| "loss": 0.0063, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.003446041140705347, | |
| "step": 1410, | |
| "valid_targets_mean": 1231.8, | |
| "valid_targets_min": 697 | |
| }, | |
| { | |
| "epoch": 6.938574938574939, | |
| "grad_norm": 0.23327794938321245, | |
| "learning_rate": 4.385930720243314e-06, | |
| "loss": 0.0067, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0034866356290876865, | |
| "step": 1415, | |
| "valid_targets_mean": 1175.0, | |
| "valid_targets_min": 801 | |
| }, | |
| { | |
| "epoch": 6.963144963144963, | |
| "grad_norm": 0.22248429253719407, | |
| "learning_rate": 4.189874939107574e-06, | |
| "loss": 0.007, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0036289298441261053, | |
| "step": 1420, | |
| "valid_targets_mean": 1280.1, | |
| "valid_targets_min": 671 | |
| }, | |
| { | |
| "epoch": 6.987714987714988, | |
| "grad_norm": 0.18240933384590993, | |
| "learning_rate": 3.998110180570525e-06, | |
| "loss": 0.0061, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0025913119316101074, | |
| "step": 1425, | |
| "valid_targets_mean": 1412.8, | |
| "valid_targets_min": 613 | |
| }, | |
| { | |
| "epoch": 7.00982800982801, | |
| "grad_norm": 0.11518082939815999, | |
| "learning_rate": 3.810654407187636e-06, | |
| "loss": 0.005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.001377458916977048, | |
| "step": 1430, | |
| "valid_targets_mean": 1409.6, | |
| "valid_targets_min": 800 | |
| }, | |
| { | |
| "epoch": 7.034398034398034, | |
| "grad_norm": 0.11366256392530506, | |
| "learning_rate": 3.6275251778928487e-06, | |
| "loss": 0.0029, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0015742455143481493, | |
| "step": 1435, | |
| "valid_targets_mean": 1455.5, | |
| "valid_targets_min": 999 | |
| }, | |
| { | |
| "epoch": 7.058968058968059, | |
| "grad_norm": 0.10653619054329262, | |
| "learning_rate": 3.4487396463538215e-06, | |
| "loss": 0.0027, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.001220267964527011, | |
| "step": 1440, | |
| "valid_targets_mean": 1301.7, | |
| "valid_targets_min": 686 | |
| }, | |
| { | |
| "epoch": 7.083538083538084, | |
| "grad_norm": 0.11194152489721383, | |
| "learning_rate": 3.2743145593652047e-06, | |
| "loss": 0.003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0016661153640598059, | |
| "step": 1445, | |
| "valid_targets_mean": 1376.6, | |
| "valid_targets_min": 725 | |
| }, | |
| { | |
| "epoch": 7.108108108108108, | |
| "grad_norm": 0.12343103375814024, | |
| "learning_rate": 3.1042662552798975e-06, | |
| "loss": 0.0028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0012464872561395168, | |
| "step": 1450, | |
| "valid_targets_mean": 1389.7, | |
| "valid_targets_min": 626 | |
| }, | |
| { | |
| "epoch": 7.132678132678133, | |
| "grad_norm": 0.12753242419982108, | |
| "learning_rate": 2.9386106624786804e-06, | |
| "loss": 0.0027, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0013661307748407125, | |
| "step": 1455, | |
| "valid_targets_mean": 1296.8, | |
| "valid_targets_min": 716 | |
| }, | |
| { | |
| "epoch": 7.157248157248158, | |
| "grad_norm": 0.14659785771116587, | |
| "learning_rate": 2.7773632978781936e-06, | |
| "loss": 0.0026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0012009042548015714, | |
| "step": 1460, | |
| "valid_targets_mean": 1220.9, | |
| "valid_targets_min": 639 | |
| }, | |
| { | |
| "epoch": 7.181818181818182, | |
| "grad_norm": 0.11218152706881185, | |
| "learning_rate": 2.620539265477512e-06, | |
| "loss": 0.0025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0011191873345524073, | |
| "step": 1465, | |
| "valid_targets_mean": 1306.1, | |
| "valid_targets_min": 873 | |
| }, | |
| { | |
| "epoch": 7.2063882063882065, | |
| "grad_norm": 0.12983333466333855, | |
| "learning_rate": 2.468153254943284e-06, | |
| "loss": 0.0026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0014806350227445364, | |
| "step": 1470, | |
| "valid_targets_mean": 1267.2, | |
| "valid_targets_min": 700 | |
| }, | |
| { | |
| "epoch": 7.2309582309582305, | |
| "grad_norm": 0.08990504914273968, | |
| "learning_rate": 2.32021954023382e-06, | |
| "loss": 0.0028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.001308899256400764, | |
| "step": 1475, | |
| "valid_targets_mean": 1533.7, | |
| "valid_targets_min": 779 | |
| }, | |
| { | |
| "epoch": 7.255528255528255, | |
| "grad_norm": 0.14187065725212764, | |
| "learning_rate": 2.1767519782620095e-06, | |
| "loss": 0.0028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.001910665538161993, | |
| "step": 1480, | |
| "valid_targets_mean": 1316.4, | |
| "valid_targets_min": 762 | |
| }, | |
| { | |
| "epoch": 7.28009828009828, | |
| "grad_norm": 0.13580002508297487, | |
| "learning_rate": 2.0377640075973926e-06, | |
| "loss": 0.0028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0013501509092748165, | |
| "step": 1485, | |
| "valid_targets_mean": 1464.1, | |
| "valid_targets_min": 734 | |
| }, | |
| { | |
| "epoch": 7.304668304668304, | |
| "grad_norm": 0.11703590507552636, | |
| "learning_rate": 1.903268647207329e-06, | |
| "loss": 0.0024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.000767023244407028, | |
| "step": 1490, | |
| "valid_targets_mean": 1434.1, | |
| "valid_targets_min": 738 | |
| }, | |
| { | |
| "epoch": 7.329238329238329, | |
| "grad_norm": 0.1259216278943979, | |
| "learning_rate": 1.7732784952375236e-06, | |
| "loss": 0.0027, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0014803120866417885, | |
| "step": 1495, | |
| "valid_targets_mean": 1378.4, | |
| "valid_targets_min": 797 | |
| }, | |
| { | |
| "epoch": 7.353808353808354, | |
| "grad_norm": 0.11355256723687553, | |
| "learning_rate": 1.6478057278319914e-06, | |
| "loss": 0.0026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0012061558663845062, | |
| "step": 1500, | |
| "valid_targets_mean": 1271.9, | |
| "valid_targets_min": 720 | |
| }, | |
| { | |
| "epoch": 7.378378378378378, | |
| "grad_norm": 0.11340763399720942, | |
| "learning_rate": 1.5268620979924986e-06, | |
| "loss": 0.0026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0010582938557490706, | |
| "step": 1505, | |
| "valid_targets_mean": 1201.9, | |
| "valid_targets_min": 801 | |
| }, | |
| { | |
| "epoch": 7.402948402948403, | |
| "grad_norm": 0.12412291642290835, | |
| "learning_rate": 1.4104589344776542e-06, | |
| "loss": 0.0024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0015159003669396043, | |
| "step": 1510, | |
| "valid_targets_mean": 1675.2, | |
| "valid_targets_min": 937 | |
| }, | |
| { | |
| "epoch": 7.427518427518428, | |
| "grad_norm": 0.13669679018868425, | |
| "learning_rate": 1.2986071407417533e-06, | |
| "loss": 0.0024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.001106357667595148, | |
| "step": 1515, | |
| "valid_targets_mean": 1426.0, | |
| "valid_targets_min": 592 | |
| }, | |
| { | |
| "epoch": 7.452088452088452, | |
| "grad_norm": 0.12301018603265855, | |
| "learning_rate": 1.1913171939134659e-06, | |
| "loss": 0.0024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.001434761332347989, | |
| "step": 1520, | |
| "valid_targets_mean": 1361.7, | |
| "valid_targets_min": 659 | |
| }, | |
| { | |
| "epoch": 7.476658476658477, | |
| "grad_norm": 0.11562603789453654, | |
| "learning_rate": 1.0885991438144448e-06, | |
| "loss": 0.0028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0013514563906937838, | |
| "step": 1525, | |
| "valid_targets_mean": 1447.6, | |
| "valid_targets_min": 959 | |
| }, | |
| { | |
| "epoch": 7.501228501228502, | |
| "grad_norm": 0.11631784496079337, | |
| "learning_rate": 9.904626120179505e-07, | |
| "loss": 0.0025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.001590099884197116, | |
| "step": 1530, | |
| "valid_targets_mean": 1350.9, | |
| "valid_targets_min": 590 | |
| }, | |
| { | |
| "epoch": 7.525798525798526, | |
| "grad_norm": 0.12342575829593498, | |
| "learning_rate": 8.969167909475939e-07, | |
| "loss": 0.0025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.002005585702136159, | |
| "step": 1535, | |
| "valid_targets_mean": 1383.1, | |
| "valid_targets_min": 891 | |
| }, | |
| { | |
| "epoch": 7.5503685503685505, | |
| "grad_norm": 0.1377112054055627, | |
| "learning_rate": 8.079704430163204e-07, | |
| "loss": 0.0025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.001176726073026657, | |
| "step": 1540, | |
| "valid_targets_mean": 1381.1, | |
| "valid_targets_min": 819 | |
| }, | |
| { | |
| "epoch": 7.5749385749385745, | |
| "grad_norm": 0.12267343278939448, | |
| "learning_rate": 7.23631899805588e-07, | |
| "loss": 0.0026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.001982441172003746, | |
| "step": 1545, | |
| "valid_targets_mean": 1264.6, | |
| "valid_targets_min": 563 | |
| }, | |
| { | |
| "epoch": 7.599508599508599, | |
| "grad_norm": 0.1232331023445151, | |
| "learning_rate": 6.439090612849863e-07, | |
| "loss": 0.0025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0012900140136480331, | |
| "step": 1550, | |
| "valid_targets_mean": 1392.8, | |
| "valid_targets_min": 744 | |
| }, | |
| { | |
| "epoch": 7.624078624078624, | |
| "grad_norm": 0.12399612774127011, | |
| "learning_rate": 5.688093950722451e-07, | |
| "loss": 0.0025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0012250710278749466, | |
| "step": 1555, | |
| "valid_targets_mean": 1380.9, | |
| "valid_targets_min": 823 | |
| }, | |
| { | |
| "epoch": 7.648648648648649, | |
| "grad_norm": 0.10093776597270249, | |
| "learning_rate": 4.983399357337215e-07, | |
| "loss": 0.0024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0011251834221184254, | |
| "step": 1560, | |
| "valid_targets_mean": 1338.3, | |
| "valid_targets_min": 745 | |
| }, | |
| { | |
| "epoch": 7.673218673218673, | |
| "grad_norm": 0.14023903435720927, | |
| "learning_rate": 4.32507284125494e-07, | |
| "loss": 0.0026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0011913267662748694, | |
| "step": 1565, | |
| "valid_targets_mean": 1174.8, | |
| "valid_targets_min": 654 | |
| }, | |
| { | |
| "epoch": 7.697788697788698, | |
| "grad_norm": 0.11347502220023985, | |
| "learning_rate": 3.7131760677505676e-07, | |
| "loss": 0.0023, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0011188078206032515, | |
| "step": 1570, | |
| "valid_targets_mean": 1516.1, | |
| "valid_targets_min": 749 | |
| }, | |
| { | |
| "epoch": 7.722358722358722, | |
| "grad_norm": 0.09825121141173124, | |
| "learning_rate": 3.1477663530371514e-07, | |
| "loss": 0.0021, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.001625724951736629, | |
| "step": 1575, | |
| "valid_targets_mean": 1451.0, | |
| "valid_targets_min": 828 | |
| }, | |
| { | |
| "epoch": 7.746928746928747, | |
| "grad_norm": 0.15120852978627652, | |
| "learning_rate": 2.6288966588967623e-07, | |
| "loss": 0.0026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0013657582458108664, | |
| "step": 1580, | |
| "valid_targets_mean": 1270.2, | |
| "valid_targets_min": 615 | |
| }, | |
| { | |
| "epoch": 7.771498771498772, | |
| "grad_norm": 0.09585305972292815, | |
| "learning_rate": 2.1566155877197903e-07, | |
| "loss": 0.0025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0007987943245097995, | |
| "step": 1585, | |
| "valid_targets_mean": 1350.7, | |
| "valid_targets_min": 609 | |
| }, | |
| { | |
| "epoch": 7.796068796068796, | |
| "grad_norm": 0.13596142729213964, | |
| "learning_rate": 1.7309673779524194e-07, | |
| "loss": 0.0023, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.001020238851197064, | |
| "step": 1590, | |
| "valid_targets_mean": 1383.6, | |
| "valid_targets_min": 812 | |
| }, | |
| { | |
| "epoch": 7.820638820638821, | |
| "grad_norm": 0.10746814374827729, | |
| "learning_rate": 1.3519918999526648e-07, | |
| "loss": 0.0023, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0010407338850200176, | |
| "step": 1595, | |
| "valid_targets_mean": 1373.1, | |
| "valid_targets_min": 605 | |
| }, | |
| { | |
| "epoch": 7.8452088452088455, | |
| "grad_norm": 0.1366405061352433, | |
| "learning_rate": 1.0197246522557491e-07, | |
| "loss": 0.0026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.001199243706651032, | |
| "step": 1600, | |
| "valid_targets_mean": 1318.0, | |
| "valid_targets_min": 619 | |
| }, | |
| { | |
| "epoch": 7.8697788697788695, | |
| "grad_norm": 0.15509314338643704, | |
| "learning_rate": 7.34196758249095e-08, | |
| "loss": 0.0038, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0018726876005530357, | |
| "step": 1605, | |
| "valid_targets_mean": 1289.9, | |
| "valid_targets_min": 800 | |
| }, | |
| { | |
| "epoch": 7.894348894348894, | |
| "grad_norm": 0.11868071753447657, | |
| "learning_rate": 4.9543496325693553e-08, | |
| "loss": 0.0022, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0009430464124307036, | |
| "step": 1610, | |
| "valid_targets_mean": 1305.1, | |
| "valid_targets_min": 719 | |
| }, | |
| { | |
| "epoch": 7.918918918918919, | |
| "grad_norm": 0.10845984516950594, | |
| "learning_rate": 3.034616320349293e-08, | |
| "loss": 0.0025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0009824966546148062, | |
| "step": 1615, | |
| "valid_targets_mean": 1196.1, | |
| "valid_targets_min": 518 | |
| }, | |
| { | |
| "epoch": 7.943488943488943, | |
| "grad_norm": 0.1252822512833604, | |
| "learning_rate": 1.58294746675558e-08, | |
| "loss": 0.0025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0012963269837200642, | |
| "step": 1620, | |
| "valid_targets_mean": 1239.3, | |
| "valid_targets_min": 587 | |
| }, | |
| { | |
| "epoch": 7.968058968058968, | |
| "grad_norm": 0.113750323811112, | |
| "learning_rate": 5.994790492352964e-09, | |
| "loss": 0.0025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0010778913274407387, | |
| "step": 1625, | |
| "valid_targets_mean": 1516.6, | |
| "valid_targets_min": 668 | |
| }, | |
| { | |
| "epoch": 7.992628992628992, | |
| "grad_norm": 0.11051935412163597, | |
| "learning_rate": 8.43031890213064e-10, | |
| "loss": 0.0024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.001104144612327218, | |
| "step": 1630, | |
| "valid_targets_mean": 1378.2, | |
| "valid_targets_min": 825 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0019607325084507465, | |
| "step": 1632, | |
| "total_flos": 3.419079710112481e+17, | |
| "train_loss": 0.08937609711645957, | |
| "train_runtime": 8905.9814, | |
| "train_samples_per_second": 5.841, | |
| "train_steps_per_second": 0.183, | |
| "valid_targets_mean": 1431.2, | |
| "valid_targets_min": 619 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1632, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.419079710112481e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |