| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 1379, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.025380710659898477, | |
| "grad_norm": 8.746054092048038, | |
| "learning_rate": 1.1594202898550726e-06, | |
| "loss": 0.8448, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46080702543258667, | |
| "step": 5, | |
| "valid_targets_mean": 3391.9, | |
| "valid_targets_min": 986 | |
| }, | |
| { | |
| "epoch": 0.050761421319796954, | |
| "grad_norm": 8.271188445211049, | |
| "learning_rate": 2.6086956521739132e-06, | |
| "loss": 0.8643, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.522148847579956, | |
| "step": 10, | |
| "valid_targets_mean": 2433.4, | |
| "valid_targets_min": 1473 | |
| }, | |
| { | |
| "epoch": 0.07614213197969544, | |
| "grad_norm": 4.513541603908623, | |
| "learning_rate": 4.057971014492754e-06, | |
| "loss": 0.8132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36975154280662537, | |
| "step": 15, | |
| "valid_targets_mean": 2374.4, | |
| "valid_targets_min": 1212 | |
| }, | |
| { | |
| "epoch": 0.10152284263959391, | |
| "grad_norm": 2.9836745456488174, | |
| "learning_rate": 5.507246376811595e-06, | |
| "loss": 0.7417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31274423003196716, | |
| "step": 20, | |
| "valid_targets_mean": 2366.5, | |
| "valid_targets_min": 733 | |
| }, | |
| { | |
| "epoch": 0.12690355329949238, | |
| "grad_norm": 2.065127504471685, | |
| "learning_rate": 6.956521739130435e-06, | |
| "loss": 0.735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3327901363372803, | |
| "step": 25, | |
| "valid_targets_mean": 2663.6, | |
| "valid_targets_min": 1324 | |
| }, | |
| { | |
| "epoch": 0.15228426395939088, | |
| "grad_norm": 1.4643063861293897, | |
| "learning_rate": 8.405797101449275e-06, | |
| "loss": 0.6935, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35331571102142334, | |
| "step": 30, | |
| "valid_targets_mean": 2389.2, | |
| "valid_targets_min": 1040 | |
| }, | |
| { | |
| "epoch": 0.17766497461928935, | |
| "grad_norm": 1.3378539239384706, | |
| "learning_rate": 9.855072463768118e-06, | |
| "loss": 0.626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29063376784324646, | |
| "step": 35, | |
| "valid_targets_mean": 2173.9, | |
| "valid_targets_min": 1262 | |
| }, | |
| { | |
| "epoch": 0.20304568527918782, | |
| "grad_norm": 0.9665513277849, | |
| "learning_rate": 1.1304347826086957e-05, | |
| "loss": 0.6444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21201933920383453, | |
| "step": 40, | |
| "valid_targets_mean": 2408.5, | |
| "valid_targets_min": 1394 | |
| }, | |
| { | |
| "epoch": 0.22842639593908629, | |
| "grad_norm": 0.8987538258480249, | |
| "learning_rate": 1.2753623188405797e-05, | |
| "loss": 0.5968, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21246054768562317, | |
| "step": 45, | |
| "valid_targets_mean": 2066.0, | |
| "valid_targets_min": 1067 | |
| }, | |
| { | |
| "epoch": 0.25380710659898476, | |
| "grad_norm": 0.9020500925340328, | |
| "learning_rate": 1.420289855072464e-05, | |
| "loss": 0.5712, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18394096195697784, | |
| "step": 50, | |
| "valid_targets_mean": 1613.1, | |
| "valid_targets_min": 1058 | |
| }, | |
| { | |
| "epoch": 0.27918781725888325, | |
| "grad_norm": 0.8478913125784002, | |
| "learning_rate": 1.565217391304348e-05, | |
| "loss": 0.5423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3302842974662781, | |
| "step": 55, | |
| "valid_targets_mean": 3342.5, | |
| "valid_targets_min": 1549 | |
| }, | |
| { | |
| "epoch": 0.30456852791878175, | |
| "grad_norm": 1.5227996339853616, | |
| "learning_rate": 1.710144927536232e-05, | |
| "loss": 0.5736, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34866365790367126, | |
| "step": 60, | |
| "valid_targets_mean": 2464.1, | |
| "valid_targets_min": 250 | |
| }, | |
| { | |
| "epoch": 0.3299492385786802, | |
| "grad_norm": 0.9365313312981728, | |
| "learning_rate": 1.8550724637681162e-05, | |
| "loss": 0.5296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3394869863986969, | |
| "step": 65, | |
| "valid_targets_mean": 2791.2, | |
| "valid_targets_min": 984 | |
| }, | |
| { | |
| "epoch": 0.3553299492385787, | |
| "grad_norm": 0.8161086659169139, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3421684801578522, | |
| "step": 70, | |
| "valid_targets_mean": 3055.1, | |
| "valid_targets_min": 959 | |
| }, | |
| { | |
| "epoch": 0.38071065989847713, | |
| "grad_norm": 0.7755427124666795, | |
| "learning_rate": 2.1449275362318844e-05, | |
| "loss": 0.5267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26595619320869446, | |
| "step": 75, | |
| "valid_targets_mean": 2610.0, | |
| "valid_targets_min": 1367 | |
| }, | |
| { | |
| "epoch": 0.40609137055837563, | |
| "grad_norm": 0.8301275472311378, | |
| "learning_rate": 2.2898550724637684e-05, | |
| "loss": 0.534, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2656007409095764, | |
| "step": 80, | |
| "valid_targets_mean": 2173.9, | |
| "valid_targets_min": 1076 | |
| }, | |
| { | |
| "epoch": 0.43147208121827413, | |
| "grad_norm": 0.7430341766697183, | |
| "learning_rate": 2.4347826086956526e-05, | |
| "loss": 0.5021, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20115895569324493, | |
| "step": 85, | |
| "valid_targets_mean": 2819.4, | |
| "valid_targets_min": 1118 | |
| }, | |
| { | |
| "epoch": 0.45685279187817257, | |
| "grad_norm": 0.8667322471672515, | |
| "learning_rate": 2.5797101449275362e-05, | |
| "loss": 0.4829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19809472560882568, | |
| "step": 90, | |
| "valid_targets_mean": 1588.8, | |
| "valid_targets_min": 1052 | |
| }, | |
| { | |
| "epoch": 0.48223350253807107, | |
| "grad_norm": 0.8666224198147493, | |
| "learning_rate": 2.7246376811594205e-05, | |
| "loss": 0.4635, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23502573370933533, | |
| "step": 95, | |
| "valid_targets_mean": 2046.4, | |
| "valid_targets_min": 1040 | |
| }, | |
| { | |
| "epoch": 0.5076142131979695, | |
| "grad_norm": 0.7163108956357823, | |
| "learning_rate": 2.8695652173913044e-05, | |
| "loss": 0.4353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2423144280910492, | |
| "step": 100, | |
| "valid_targets_mean": 3183.2, | |
| "valid_targets_min": 795 | |
| }, | |
| { | |
| "epoch": 0.5329949238578681, | |
| "grad_norm": 0.865566100726533, | |
| "learning_rate": 3.0144927536231887e-05, | |
| "loss": 0.4693, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15293586254119873, | |
| "step": 105, | |
| "valid_targets_mean": 1570.0, | |
| "valid_targets_min": 443 | |
| }, | |
| { | |
| "epoch": 0.5583756345177665, | |
| "grad_norm": 0.9232612201171461, | |
| "learning_rate": 3.1594202898550726e-05, | |
| "loss": 0.4865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24900048971176147, | |
| "step": 110, | |
| "valid_targets_mean": 2685.6, | |
| "valid_targets_min": 1052 | |
| }, | |
| { | |
| "epoch": 0.583756345177665, | |
| "grad_norm": 0.7835292686525868, | |
| "learning_rate": 3.304347826086957e-05, | |
| "loss": 0.4911, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25293511152267456, | |
| "step": 115, | |
| "valid_targets_mean": 2541.1, | |
| "valid_targets_min": 1027 | |
| }, | |
| { | |
| "epoch": 0.6091370558375635, | |
| "grad_norm": 0.7820807679495676, | |
| "learning_rate": 3.449275362318841e-05, | |
| "loss": 0.4888, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20399141311645508, | |
| "step": 120, | |
| "valid_targets_mean": 2334.9, | |
| "valid_targets_min": 801 | |
| }, | |
| { | |
| "epoch": 0.6345177664974619, | |
| "grad_norm": 0.7995301620599062, | |
| "learning_rate": 3.594202898550725e-05, | |
| "loss": 0.4849, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23064780235290527, | |
| "step": 125, | |
| "valid_targets_mean": 2171.4, | |
| "valid_targets_min": 360 | |
| }, | |
| { | |
| "epoch": 0.6598984771573604, | |
| "grad_norm": 0.6963743946680525, | |
| "learning_rate": 3.739130434782609e-05, | |
| "loss": 0.4522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18462923169136047, | |
| "step": 130, | |
| "valid_targets_mean": 3114.9, | |
| "valid_targets_min": 1203 | |
| }, | |
| { | |
| "epoch": 0.6852791878172588, | |
| "grad_norm": 0.8741657110892967, | |
| "learning_rate": 3.884057971014493e-05, | |
| "loss": 0.4803, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2790442705154419, | |
| "step": 135, | |
| "valid_targets_mean": 2300.5, | |
| "valid_targets_min": 997 | |
| }, | |
| { | |
| "epoch": 0.7106598984771574, | |
| "grad_norm": 0.7921279308943474, | |
| "learning_rate": 3.999993591506466e-05, | |
| "loss": 0.4917, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24704962968826294, | |
| "step": 140, | |
| "valid_targets_mean": 2552.6, | |
| "valid_targets_min": 1212 | |
| }, | |
| { | |
| "epoch": 0.7360406091370558, | |
| "grad_norm": 0.8137994242183286, | |
| "learning_rate": 3.99976929854497e-05, | |
| "loss": 0.4831, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26479896903038025, | |
| "step": 145, | |
| "valid_targets_mean": 2318.9, | |
| "valid_targets_min": 1106 | |
| }, | |
| { | |
| "epoch": 0.7614213197969543, | |
| "grad_norm": 0.9619494319652759, | |
| "learning_rate": 3.999224621974382e-05, | |
| "loss": 0.4804, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21784883737564087, | |
| "step": 150, | |
| "valid_targets_mean": 1636.8, | |
| "valid_targets_min": 293 | |
| }, | |
| { | |
| "epoch": 0.7868020304568528, | |
| "grad_norm": 0.8133912555983293, | |
| "learning_rate": 3.9983596490574876e-05, | |
| "loss": 0.4899, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25159645080566406, | |
| "step": 155, | |
| "valid_targets_mean": 2658.9, | |
| "valid_targets_min": 1305 | |
| }, | |
| { | |
| "epoch": 0.8121827411167513, | |
| "grad_norm": 0.7469299189370396, | |
| "learning_rate": 3.9971745183718484e-05, | |
| "loss": 0.4689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24954304099082947, | |
| "step": 160, | |
| "valid_targets_mean": 2515.0, | |
| "valid_targets_min": 1564 | |
| }, | |
| { | |
| "epoch": 0.8375634517766497, | |
| "grad_norm": 0.8486207699746516, | |
| "learning_rate": 3.995669419787586e-05, | |
| "loss": 0.4777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22933290898799896, | |
| "step": 165, | |
| "valid_targets_mean": 2142.1, | |
| "valid_targets_min": 1260 | |
| }, | |
| { | |
| "epoch": 0.8629441624365483, | |
| "grad_norm": 0.7790368486491089, | |
| "learning_rate": 3.9938445944369745e-05, | |
| "loss": 0.447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31065988540649414, | |
| "step": 170, | |
| "valid_targets_mean": 2739.6, | |
| "valid_targets_min": 1702 | |
| }, | |
| { | |
| "epoch": 0.8883248730964467, | |
| "grad_norm": 0.877554911194639, | |
| "learning_rate": 3.9917003346758035e-05, | |
| "loss": 0.4579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17315532267093658, | |
| "step": 175, | |
| "valid_targets_mean": 1833.6, | |
| "valid_targets_min": 717 | |
| }, | |
| { | |
| "epoch": 0.9137055837563451, | |
| "grad_norm": 0.8295490474717458, | |
| "learning_rate": 3.989236984036541e-05, | |
| "loss": 0.452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2310536652803421, | |
| "step": 180, | |
| "valid_targets_mean": 2141.0, | |
| "valid_targets_min": 1027 | |
| }, | |
| { | |
| "epoch": 0.9390862944162437, | |
| "grad_norm": 0.841351322041739, | |
| "learning_rate": 3.986454937173292e-05, | |
| "loss": 0.4723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2736300230026245, | |
| "step": 185, | |
| "valid_targets_mean": 2538.2, | |
| "valid_targets_min": 664 | |
| }, | |
| { | |
| "epoch": 0.9644670050761421, | |
| "grad_norm": 0.8292630926233844, | |
| "learning_rate": 3.98335463979858e-05, | |
| "loss": 0.4487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1952267736196518, | |
| "step": 190, | |
| "valid_targets_mean": 2048.0, | |
| "valid_targets_min": 925 | |
| }, | |
| { | |
| "epoch": 0.9898477157360406, | |
| "grad_norm": 0.7874022860460002, | |
| "learning_rate": 3.9799365886119304e-05, | |
| "loss": 0.4553, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2375510036945343, | |
| "step": 195, | |
| "valid_targets_mean": 2771.0, | |
| "valid_targets_min": 1014 | |
| }, | |
| { | |
| "epoch": 1.015228426395939, | |
| "grad_norm": 0.8281005262337682, | |
| "learning_rate": 3.976201331220296e-05, | |
| "loss": 0.4614, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24540838599205017, | |
| "step": 200, | |
| "valid_targets_mean": 2320.5, | |
| "valid_targets_min": 743 | |
| }, | |
| { | |
| "epoch": 1.0406091370558375, | |
| "grad_norm": 0.863938516440108, | |
| "learning_rate": 3.9721494660503295e-05, | |
| "loss": 0.423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2663731575012207, | |
| "step": 205, | |
| "valid_targets_mean": 2648.2, | |
| "valid_targets_min": 1396 | |
| }, | |
| { | |
| "epoch": 1.0659898477157361, | |
| "grad_norm": 0.737462168238837, | |
| "learning_rate": 3.9677816422525024e-05, | |
| "loss": 0.4655, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22477290034294128, | |
| "step": 210, | |
| "valid_targets_mean": 3102.6, | |
| "valid_targets_min": 1448 | |
| }, | |
| { | |
| "epoch": 1.0913705583756346, | |
| "grad_norm": 0.7157709835306627, | |
| "learning_rate": 3.963098559597112e-05, | |
| "loss": 0.401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19973540306091309, | |
| "step": 215, | |
| "valid_targets_mean": 3456.8, | |
| "valid_targets_min": 1023 | |
| }, | |
| { | |
| "epoch": 1.116751269035533, | |
| "grad_norm": 0.6962890917176653, | |
| "learning_rate": 3.9581009683621634e-05, | |
| "loss": 0.4255, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24212422966957092, | |
| "step": 220, | |
| "valid_targets_mean": 3592.0, | |
| "valid_targets_min": 547 | |
| }, | |
| { | |
| "epoch": 1.1421319796954315, | |
| "grad_norm": 0.8386868847925464, | |
| "learning_rate": 3.952789669213173e-05, | |
| "loss": 0.461, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26728707551956177, | |
| "step": 225, | |
| "valid_targets_mean": 2497.1, | |
| "valid_targets_min": 1037 | |
| }, | |
| { | |
| "epoch": 1.16751269035533, | |
| "grad_norm": 0.8271050112319727, | |
| "learning_rate": 3.9471655130748894e-05, | |
| "loss": 0.4043, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27514880895614624, | |
| "step": 230, | |
| "valid_targets_mean": 3284.0, | |
| "valid_targets_min": 1193 | |
| }, | |
| { | |
| "epoch": 1.1928934010152283, | |
| "grad_norm": 0.6759784826022383, | |
| "learning_rate": 3.9412294009949716e-05, | |
| "loss": 0.4098, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18727204203605652, | |
| "step": 235, | |
| "valid_targets_mean": 3005.8, | |
| "valid_targets_min": 1041 | |
| }, | |
| { | |
| "epoch": 1.218274111675127, | |
| "grad_norm": 0.817175374388556, | |
| "learning_rate": 3.9349822839996266e-05, | |
| "loss": 0.4323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20295163989067078, | |
| "step": 240, | |
| "valid_targets_mean": 2283.8, | |
| "valid_targets_min": 943 | |
| }, | |
| { | |
| "epoch": 1.2436548223350254, | |
| "grad_norm": 0.8097233905648007, | |
| "learning_rate": 3.928425162941248e-05, | |
| "loss": 0.4481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23431196808815002, | |
| "step": 245, | |
| "valid_targets_mean": 2414.5, | |
| "valid_targets_min": 1209 | |
| }, | |
| { | |
| "epoch": 1.2690355329949239, | |
| "grad_norm": 0.7813237824157772, | |
| "learning_rate": 3.9215590883380687e-05, | |
| "loss": 0.4234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19021698832511902, | |
| "step": 250, | |
| "valid_targets_mean": 2019.9, | |
| "valid_targets_min": 911 | |
| }, | |
| { | |
| "epoch": 1.2944162436548223, | |
| "grad_norm": 0.8647188948351984, | |
| "learning_rate": 3.914385160205858e-05, | |
| "loss": 0.4021, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2341812700033188, | |
| "step": 255, | |
| "valid_targets_mean": 2704.2, | |
| "valid_targets_min": 1352 | |
| }, | |
| { | |
| "epoch": 1.3197969543147208, | |
| "grad_norm": 0.6904746968392748, | |
| "learning_rate": 3.9069045278816844e-05, | |
| "loss": 0.4236, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18902748823165894, | |
| "step": 260, | |
| "valid_targets_mean": 2808.2, | |
| "valid_targets_min": 1029 | |
| }, | |
| { | |
| "epoch": 1.3451776649746192, | |
| "grad_norm": 0.8806548756271572, | |
| "learning_rate": 3.899118389839785e-05, | |
| "loss": 0.4181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23270106315612793, | |
| "step": 265, | |
| "valid_targets_mean": 2471.4, | |
| "valid_targets_min": 1637 | |
| }, | |
| { | |
| "epoch": 1.3705583756345177, | |
| "grad_norm": 0.8986724175664121, | |
| "learning_rate": 3.8910279934995545e-05, | |
| "loss": 0.4117, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2592482566833496, | |
| "step": 270, | |
| "valid_targets_mean": 2158.0, | |
| "valid_targets_min": 1359 | |
| }, | |
| { | |
| "epoch": 1.3959390862944163, | |
| "grad_norm": 0.8693834754860535, | |
| "learning_rate": 3.8826346350256943e-05, | |
| "loss": 0.398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15725184977054596, | |
| "step": 275, | |
| "valid_targets_mean": 1973.4, | |
| "valid_targets_min": 260 | |
| }, | |
| { | |
| "epoch": 1.4213197969543148, | |
| "grad_norm": 0.9695877466874488, | |
| "learning_rate": 3.873939659120558e-05, | |
| "loss": 0.4151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2322547733783722, | |
| "step": 280, | |
| "valid_targets_mean": 2277.5, | |
| "valid_targets_min": 881 | |
| }, | |
| { | |
| "epoch": 1.4467005076142132, | |
| "grad_norm": 1.1408976363494896, | |
| "learning_rate": 3.864944458808712e-05, | |
| "loss": 0.4056, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20991933345794678, | |
| "step": 285, | |
| "valid_targets_mean": 2109.6, | |
| "valid_targets_min": 338 | |
| }, | |
| { | |
| "epoch": 1.4720812182741116, | |
| "grad_norm": 0.7930406989370231, | |
| "learning_rate": 3.855650475213761e-05, | |
| "loss": 0.4243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19697341322898865, | |
| "step": 290, | |
| "valid_targets_mean": 2129.9, | |
| "valid_targets_min": 954 | |
| }, | |
| { | |
| "epoch": 1.49746192893401, | |
| "grad_norm": 0.7719998634448374, | |
| "learning_rate": 3.846059197327466e-05, | |
| "loss": 0.4011, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19249945878982544, | |
| "step": 295, | |
| "valid_targets_mean": 2426.6, | |
| "valid_targets_min": 1026 | |
| }, | |
| { | |
| "epoch": 1.5228426395939088, | |
| "grad_norm": 1.038359126244796, | |
| "learning_rate": 3.836172161771189e-05, | |
| "loss": 0.4049, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19448643922805786, | |
| "step": 300, | |
| "valid_targets_mean": 2199.6, | |
| "valid_targets_min": 1113 | |
| }, | |
| { | |
| "epoch": 1.548223350253807, | |
| "grad_norm": 0.8411192815275068, | |
| "learning_rate": 3.8259909525497134e-05, | |
| "loss": 0.3955, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23402699828147888, | |
| "step": 305, | |
| "valid_targets_mean": 2455.5, | |
| "valid_targets_min": 1271 | |
| }, | |
| { | |
| "epoch": 1.5736040609137056, | |
| "grad_norm": 0.6971216673275156, | |
| "learning_rate": 3.81551720079747e-05, | |
| "loss": 0.4128, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20138466358184814, | |
| "step": 310, | |
| "valid_targets_mean": 2834.8, | |
| "valid_targets_min": 664 | |
| }, | |
| { | |
| "epoch": 1.598984771573604, | |
| "grad_norm": 0.7718252836789953, | |
| "learning_rate": 3.8047525845172104e-05, | |
| "loss": 0.3937, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18358199298381805, | |
| "step": 315, | |
| "valid_targets_mean": 2064.4, | |
| "valid_targets_min": 265 | |
| }, | |
| { | |
| "epoch": 1.6243654822335025, | |
| "grad_norm": 0.8717500738963505, | |
| "learning_rate": 3.7936988283111764e-05, | |
| "loss": 0.4031, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17218948900699615, | |
| "step": 320, | |
| "valid_targets_mean": 1914.4, | |
| "valid_targets_min": 1034 | |
| }, | |
| { | |
| "epoch": 1.649746192893401, | |
| "grad_norm": 0.9254432850747054, | |
| "learning_rate": 3.7823577031048e-05, | |
| "loss": 0.4003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24102842807769775, | |
| "step": 325, | |
| "valid_targets_mean": 1880.4, | |
| "valid_targets_min": 1207 | |
| }, | |
| { | |
| "epoch": 1.6751269035532994, | |
| "grad_norm": 0.7161238412026414, | |
| "learning_rate": 3.77073102586298e-05, | |
| "loss": 0.4184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19188612699508667, | |
| "step": 330, | |
| "valid_targets_mean": 2821.5, | |
| "valid_targets_min": 1192 | |
| }, | |
| { | |
| "epoch": 1.700507614213198, | |
| "grad_norm": 0.7671504397949204, | |
| "learning_rate": 3.758820659298991e-05, | |
| "loss": 0.4147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19509898126125336, | |
| "step": 335, | |
| "valid_targets_mean": 2548.6, | |
| "valid_targets_min": 395 | |
| }, | |
| { | |
| "epoch": 1.7258883248730963, | |
| "grad_norm": 0.6536269958225315, | |
| "learning_rate": 3.746628511576054e-05, | |
| "loss": 0.4208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21844618022441864, | |
| "step": 340, | |
| "valid_targets_mean": 3179.2, | |
| "valid_targets_min": 787 | |
| }, | |
| { | |
| "epoch": 1.751269035532995, | |
| "grad_norm": 0.7979435685524106, | |
| "learning_rate": 3.734156536001629e-05, | |
| "loss": 0.4288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15352977812290192, | |
| "step": 345, | |
| "valid_targets_mean": 1618.5, | |
| "valid_targets_min": 663 | |
| }, | |
| { | |
| "epoch": 1.7766497461928934, | |
| "grad_norm": 0.7326788452634015, | |
| "learning_rate": 3.721406730714476e-05, | |
| "loss": 0.4035, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16227096319198608, | |
| "step": 350, | |
| "valid_targets_mean": 2581.4, | |
| "valid_targets_min": 1239 | |
| }, | |
| { | |
| "epoch": 1.8020304568527918, | |
| "grad_norm": 0.8737447795021912, | |
| "learning_rate": 3.7083811383645334e-05, | |
| "loss": 0.4057, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2652243673801422, | |
| "step": 355, | |
| "valid_targets_mean": 2171.6, | |
| "valid_targets_min": 840 | |
| }, | |
| { | |
| "epoch": 1.8274111675126905, | |
| "grad_norm": 0.8207080923695146, | |
| "learning_rate": 3.695081845785663e-05, | |
| "loss": 0.4183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20286084711551666, | |
| "step": 360, | |
| "valid_targets_mean": 1936.5, | |
| "valid_targets_min": 873 | |
| }, | |
| { | |
| "epoch": 1.8527918781725887, | |
| "grad_norm": 0.7029601914650748, | |
| "learning_rate": 3.6815109836613165e-05, | |
| "loss": 0.3992, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2135118693113327, | |
| "step": 365, | |
| "valid_targets_mean": 2853.2, | |
| "valid_targets_min": 1190 | |
| }, | |
| { | |
| "epoch": 1.8781725888324874, | |
| "grad_norm": 0.7056348465600532, | |
| "learning_rate": 3.6676707261831836e-05, | |
| "loss": 0.3993, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21114632487297058, | |
| "step": 370, | |
| "valid_targets_mean": 3049.5, | |
| "valid_targets_min": 1267 | |
| }, | |
| { | |
| "epoch": 1.9035532994923858, | |
| "grad_norm": 0.7655665068417371, | |
| "learning_rate": 3.6535632907028566e-05, | |
| "loss": 0.4198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21775612235069275, | |
| "step": 375, | |
| "valid_targets_mean": 2581.8, | |
| "valid_targets_min": 554 | |
| }, | |
| { | |
| "epoch": 1.9289340101522843, | |
| "grad_norm": 0.9215750903172012, | |
| "learning_rate": 3.6391909373765944e-05, | |
| "loss": 0.4097, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2149774134159088, | |
| "step": 380, | |
| "valid_targets_mean": 1916.9, | |
| "valid_targets_min": 1348 | |
| }, | |
| { | |
| "epoch": 1.9543147208121827, | |
| "grad_norm": 0.7063465279362279, | |
| "learning_rate": 3.6245559688032176e-05, | |
| "loss": 0.3997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16683170199394226, | |
| "step": 385, | |
| "valid_targets_mean": 2854.8, | |
| "valid_targets_min": 1031 | |
| }, | |
| { | |
| "epoch": 1.9796954314720812, | |
| "grad_norm": 0.8653155796886824, | |
| "learning_rate": 3.609660729655212e-05, | |
| "loss": 0.4187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18881481885910034, | |
| "step": 390, | |
| "valid_targets_mean": 1948.9, | |
| "valid_targets_min": 1062 | |
| }, | |
| { | |
| "epoch": 2.00507614213198, | |
| "grad_norm": 0.8170991461484087, | |
| "learning_rate": 3.5945076063030835e-05, | |
| "loss": 0.3959, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1733715534210205, | |
| "step": 395, | |
| "valid_targets_mean": 1751.5, | |
| "valid_targets_min": 921 | |
| }, | |
| { | |
| "epoch": 2.030456852791878, | |
| "grad_norm": 0.7475776291611619, | |
| "learning_rate": 3.579099026433044e-05, | |
| "loss": 0.3661, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19022664427757263, | |
| "step": 400, | |
| "valid_targets_mean": 2863.1, | |
| "valid_targets_min": 1108 | |
| }, | |
| { | |
| "epoch": 2.0558375634517767, | |
| "grad_norm": 0.6491922016359316, | |
| "learning_rate": 3.563437458658064e-05, | |
| "loss": 0.364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20632588863372803, | |
| "step": 405, | |
| "valid_targets_mean": 3949.6, | |
| "valid_targets_min": 519 | |
| }, | |
| { | |
| "epoch": 2.081218274111675, | |
| "grad_norm": 1.0197660638706911, | |
| "learning_rate": 3.547525412122378e-05, | |
| "loss": 0.3568, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21689572930335999, | |
| "step": 410, | |
| "valid_targets_mean": 2777.9, | |
| "valid_targets_min": 293 | |
| }, | |
| { | |
| "epoch": 2.1065989847715736, | |
| "grad_norm": 0.7134302398281508, | |
| "learning_rate": 3.531365436099497e-05, | |
| "loss": 0.361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18497325479984283, | |
| "step": 415, | |
| "valid_targets_mean": 3421.8, | |
| "valid_targets_min": 960 | |
| }, | |
| { | |
| "epoch": 2.1319796954314723, | |
| "grad_norm": 0.6857248384939708, | |
| "learning_rate": 3.5149601195837815e-05, | |
| "loss": 0.3541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13612838089466095, | |
| "step": 420, | |
| "valid_targets_mean": 2547.1, | |
| "valid_targets_min": 1322 | |
| }, | |
| { | |
| "epoch": 2.1573604060913705, | |
| "grad_norm": 0.8453797312754958, | |
| "learning_rate": 3.498312090875667e-05, | |
| "loss": 0.3589, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1847008466720581, | |
| "step": 425, | |
| "valid_targets_mean": 2348.5, | |
| "valid_targets_min": 314 | |
| }, | |
| { | |
| "epoch": 2.182741116751269, | |
| "grad_norm": 0.7391501108956771, | |
| "learning_rate": 3.481424017160574e-05, | |
| "loss": 0.3631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14230400323867798, | |
| "step": 430, | |
| "valid_targets_mean": 2258.4, | |
| "valid_targets_min": 1001 | |
| }, | |
| { | |
| "epoch": 2.2081218274111674, | |
| "grad_norm": 0.7770616989599075, | |
| "learning_rate": 3.464298604081607e-05, | |
| "loss": 0.3753, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1601681113243103, | |
| "step": 435, | |
| "valid_targets_mean": 2456.1, | |
| "valid_targets_min": 783 | |
| }, | |
| { | |
| "epoch": 2.233502538071066, | |
| "grad_norm": 0.7765882607320425, | |
| "learning_rate": 3.4469385953060715e-05, | |
| "loss": 0.355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17671945691108704, | |
| "step": 440, | |
| "valid_targets_mean": 3366.9, | |
| "valid_targets_min": 299 | |
| }, | |
| { | |
| "epoch": 2.2588832487309647, | |
| "grad_norm": 0.8902139321477067, | |
| "learning_rate": 3.429346772085923e-05, | |
| "loss": 0.3607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16170795261859894, | |
| "step": 445, | |
| "valid_targets_mean": 1621.0, | |
| "valid_targets_min": 974 | |
| }, | |
| { | |
| "epoch": 2.284263959390863, | |
| "grad_norm": 0.8702764129458472, | |
| "learning_rate": 3.4115259528121685e-05, | |
| "loss": 0.3566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15190370380878448, | |
| "step": 450, | |
| "valid_targets_mean": 1681.5, | |
| "valid_targets_min": 386 | |
| }, | |
| { | |
| "epoch": 2.3096446700507616, | |
| "grad_norm": 0.7232965659119709, | |
| "learning_rate": 3.3934789925633426e-05, | |
| "loss": 0.3471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15331853926181793, | |
| "step": 455, | |
| "valid_targets_mean": 2754.1, | |
| "valid_targets_min": 1361 | |
| }, | |
| { | |
| "epoch": 2.33502538071066, | |
| "grad_norm": 0.9155170217649454, | |
| "learning_rate": 3.37520878264809e-05, | |
| "loss": 0.3891, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20819316804409027, | |
| "step": 460, | |
| "valid_targets_mean": 2690.5, | |
| "valid_targets_min": 1071 | |
| }, | |
| { | |
| "epoch": 2.3604060913705585, | |
| "grad_norm": 0.8781514672252451, | |
| "learning_rate": 3.356718250141945e-05, | |
| "loss": 0.3724, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.134077787399292, | |
| "step": 465, | |
| "valid_targets_mean": 2086.6, | |
| "valid_targets_min": 1178 | |
| }, | |
| { | |
| "epoch": 2.3857868020304567, | |
| "grad_norm": 0.7830372566270385, | |
| "learning_rate": 3.33801035741839e-05, | |
| "loss": 0.3503, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18803343176841736, | |
| "step": 470, | |
| "valid_targets_mean": 2633.1, | |
| "valid_targets_min": 1325 | |
| }, | |
| { | |
| "epoch": 2.4111675126903553, | |
| "grad_norm": 0.8576354377653381, | |
| "learning_rate": 3.3190881016742476e-05, | |
| "loss": 0.3606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20815789699554443, | |
| "step": 475, | |
| "valid_targets_mean": 2486.2, | |
| "valid_targets_min": 1207 | |
| }, | |
| { | |
| "epoch": 2.436548223350254, | |
| "grad_norm": 1.5435968887704627, | |
| "learning_rate": 3.2999545144495037e-05, | |
| "loss": 0.3529, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.161598339676857, | |
| "step": 480, | |
| "valid_targets_mean": 1942.1, | |
| "valid_targets_min": 1065 | |
| }, | |
| { | |
| "epoch": 2.4619289340101522, | |
| "grad_norm": 0.811888246473812, | |
| "learning_rate": 3.280612661141615e-05, | |
| "loss": 0.3768, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19060321152210236, | |
| "step": 485, | |
| "valid_targets_mean": 2876.8, | |
| "valid_targets_min": 730 | |
| }, | |
| { | |
| "epoch": 2.487309644670051, | |
| "grad_norm": 0.8786943381528795, | |
| "learning_rate": 3.2610656405144155e-05, | |
| "loss": 0.3798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15384045243263245, | |
| "step": 490, | |
| "valid_targets_mean": 2182.9, | |
| "valid_targets_min": 991 | |
| }, | |
| { | |
| "epoch": 2.512690355329949, | |
| "grad_norm": 0.820490729426486, | |
| "learning_rate": 3.241316584201647e-05, | |
| "loss": 0.3505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17183181643486023, | |
| "step": 495, | |
| "valid_targets_mean": 2279.4, | |
| "valid_targets_min": 1108 | |
| }, | |
| { | |
| "epoch": 2.5380710659898478, | |
| "grad_norm": 0.9204652322698659, | |
| "learning_rate": 3.2213686562052474e-05, | |
| "loss": 0.3732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1887807548046112, | |
| "step": 500, | |
| "valid_targets_mean": 2191.2, | |
| "valid_targets_min": 1041 | |
| }, | |
| { | |
| "epoch": 2.563451776649746, | |
| "grad_norm": 0.9465490176668487, | |
| "learning_rate": 3.201225052388446e-05, | |
| "loss": 0.334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22429196536540985, | |
| "step": 505, | |
| "valid_targets_mean": 3044.9, | |
| "valid_targets_min": 1027 | |
| }, | |
| { | |
| "epoch": 2.5888324873096447, | |
| "grad_norm": 0.9526205216743764, | |
| "learning_rate": 3.1808889999637496e-05, | |
| "loss": 0.3468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20097972452640533, | |
| "step": 510, | |
| "valid_targets_mean": 1904.6, | |
| "valid_targets_min": 520 | |
| }, | |
| { | |
| "epoch": 2.6142131979695433, | |
| "grad_norm": 0.729520567033627, | |
| "learning_rate": 3.16036375697591e-05, | |
| "loss": 0.3585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16022509336471558, | |
| "step": 515, | |
| "valid_targets_mean": 2627.0, | |
| "valid_targets_min": 989 | |
| }, | |
| { | |
| "epoch": 2.6395939086294415, | |
| "grad_norm": 1.3536946216616152, | |
| "learning_rate": 3.1396526117799557e-05, | |
| "loss": 0.3765, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18466079235076904, | |
| "step": 520, | |
| "valid_targets_mean": 2746.9, | |
| "valid_targets_min": 1103 | |
| }, | |
| { | |
| "epoch": 2.66497461928934, | |
| "grad_norm": 0.7475141562550974, | |
| "learning_rate": 3.1187588825143596e-05, | |
| "loss": 0.3718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15916241705417633, | |
| "step": 525, | |
| "valid_targets_mean": 2781.8, | |
| "valid_targets_min": 1013 | |
| }, | |
| { | |
| "epoch": 2.6903553299492384, | |
| "grad_norm": 0.8623691385097324, | |
| "learning_rate": 3.097685916569439e-05, | |
| "loss": 0.3475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15874391794204712, | |
| "step": 530, | |
| "valid_targets_mean": 1746.9, | |
| "valid_targets_min": 960 | |
| }, | |
| { | |
| "epoch": 2.715736040609137, | |
| "grad_norm": 0.8434226552181769, | |
| "learning_rate": 3.076437090051073e-05, | |
| "loss": 0.3541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1512419879436493, | |
| "step": 535, | |
| "valid_targets_mean": 1490.5, | |
| "valid_targets_min": 615 | |
| }, | |
| { | |
| "epoch": 2.7411167512690353, | |
| "grad_norm": 0.8209590984528506, | |
| "learning_rate": 3.0550158072398125e-05, | |
| "loss": 0.3551, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17597289383411407, | |
| "step": 540, | |
| "valid_targets_mean": 2720.9, | |
| "valid_targets_min": 1163 | |
| }, | |
| { | |
| "epoch": 2.766497461928934, | |
| "grad_norm": 0.7574051365860957, | |
| "learning_rate": 3.0334255000454795e-05, | |
| "loss": 0.3348, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20840352773666382, | |
| "step": 545, | |
| "valid_targets_mean": 3306.6, | |
| "valid_targets_min": 1083 | |
| }, | |
| { | |
| "epoch": 2.7918781725888326, | |
| "grad_norm": 0.8006767917594756, | |
| "learning_rate": 3.011669627457341e-05, | |
| "loss": 0.3625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18542571365833282, | |
| "step": 550, | |
| "valid_targets_mean": 2468.4, | |
| "valid_targets_min": 1178 | |
| }, | |
| { | |
| "epoch": 2.817258883248731, | |
| "grad_norm": 0.9470419157549197, | |
| "learning_rate": 2.989751674989943e-05, | |
| "loss": 0.3654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14285054802894592, | |
| "step": 555, | |
| "valid_targets_mean": 1392.9, | |
| "valid_targets_min": 891 | |
| }, | |
| { | |
| "epoch": 2.8426395939086295, | |
| "grad_norm": 0.8645437304433551, | |
| "learning_rate": 2.967675154124696e-05, | |
| "loss": 0.3818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19589808583259583, | |
| "step": 560, | |
| "valid_targets_mean": 2538.4, | |
| "valid_targets_min": 866 | |
| }, | |
| { | |
| "epoch": 2.868020304568528, | |
| "grad_norm": 0.7250404475202903, | |
| "learning_rate": 2.945443601747297e-05, | |
| "loss": 0.3504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14852246642112732, | |
| "step": 565, | |
| "valid_targets_mean": 2435.1, | |
| "valid_targets_min": 654 | |
| }, | |
| { | |
| "epoch": 2.8934010152284264, | |
| "grad_norm": 0.7824384771297894, | |
| "learning_rate": 2.923060579581087e-05, | |
| "loss": 0.3567, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15607652068138123, | |
| "step": 570, | |
| "valid_targets_mean": 2221.4, | |
| "valid_targets_min": 925 | |
| }, | |
| { | |
| "epoch": 2.9187817258883246, | |
| "grad_norm": 0.8509884081444614, | |
| "learning_rate": 2.9005296736164246e-05, | |
| "loss": 0.3791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1786184310913086, | |
| "step": 575, | |
| "valid_targets_mean": 1949.9, | |
| "valid_targets_min": 417 | |
| }, | |
| { | |
| "epoch": 2.9441624365482233, | |
| "grad_norm": 0.8941701590016254, | |
| "learning_rate": 2.8778544935361742e-05, | |
| "loss": 0.3747, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17315533757209778, | |
| "step": 580, | |
| "valid_targets_mean": 2073.0, | |
| "valid_targets_min": 1230 | |
| }, | |
| { | |
| "epoch": 2.969543147208122, | |
| "grad_norm": 0.7031934956461914, | |
| "learning_rate": 2.855038672137396e-05, | |
| "loss": 0.3458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1579364687204361, | |
| "step": 585, | |
| "valid_targets_mean": 3130.5, | |
| "valid_targets_min": 1172 | |
| }, | |
| { | |
| "epoch": 2.99492385786802, | |
| "grad_norm": 0.783365609912982, | |
| "learning_rate": 2.8320858647493374e-05, | |
| "loss": 0.3659, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15455904603004456, | |
| "step": 590, | |
| "valid_targets_mean": 3087.0, | |
| "valid_targets_min": 1488 | |
| }, | |
| { | |
| "epoch": 3.020304568527919, | |
| "grad_norm": 0.6900709567681671, | |
| "learning_rate": 2.8089997486478102e-05, | |
| "loss": 0.3371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15949803590774536, | |
| "step": 595, | |
| "valid_targets_mean": 3190.9, | |
| "valid_targets_min": 891 | |
| }, | |
| { | |
| "epoch": 3.045685279187817, | |
| "grad_norm": 1.0051215762468377, | |
| "learning_rate": 2.785784022466053e-05, | |
| "loss": 0.3112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17178702354431152, | |
| "step": 600, | |
| "valid_targets_mean": 2351.5, | |
| "valid_targets_min": 1187 | |
| }, | |
| { | |
| "epoch": 3.0710659898477157, | |
| "grad_norm": 0.923519604987137, | |
| "learning_rate": 2.7624424056021707e-05, | |
| "loss": 0.3069, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15625596046447754, | |
| "step": 605, | |
| "valid_targets_mean": 1801.5, | |
| "valid_targets_min": 1014 | |
| }, | |
| { | |
| "epoch": 3.0964467005076144, | |
| "grad_norm": 0.7776067525143058, | |
| "learning_rate": 2.738978637623252e-05, | |
| "loss": 0.3277, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13861015439033508, | |
| "step": 610, | |
| "valid_targets_mean": 2401.6, | |
| "valid_targets_min": 990 | |
| }, | |
| { | |
| "epoch": 3.1218274111675126, | |
| "grad_norm": 0.816907993425409, | |
| "learning_rate": 2.7153964776662517e-05, | |
| "loss": 0.2997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16079218685626984, | |
| "step": 615, | |
| "valid_targets_mean": 2913.8, | |
| "valid_targets_min": 1176 | |
| }, | |
| { | |
| "epoch": 3.1472081218274113, | |
| "grad_norm": 0.9941961064238964, | |
| "learning_rate": 2.691699703835733e-05, | |
| "loss": 0.3111, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.138283371925354, | |
| "step": 620, | |
| "valid_targets_mean": 1757.9, | |
| "valid_targets_min": 1172 | |
| }, | |
| { | |
| "epoch": 3.1725888324873095, | |
| "grad_norm": 0.8805394597273256, | |
| "learning_rate": 2.6678921125985845e-05, | |
| "loss": 0.3162, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17877605557441711, | |
| "step": 625, | |
| "valid_targets_mean": 2323.0, | |
| "valid_targets_min": 299 | |
| }, | |
| { | |
| "epoch": 3.197969543147208, | |
| "grad_norm": 0.8327034844764604, | |
| "learning_rate": 2.6439775181757806e-05, | |
| "loss": 0.3114, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1432086080312729, | |
| "step": 630, | |
| "valid_targets_mean": 2427.1, | |
| "valid_targets_min": 832 | |
| }, | |
| { | |
| "epoch": 3.223350253807107, | |
| "grad_norm": 0.8621722670607938, | |
| "learning_rate": 2.6199597519313092e-05, | |
| "loss": 0.3166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1823379099369049, | |
| "step": 635, | |
| "valid_targets_mean": 2864.4, | |
| "valid_targets_min": 1810 | |
| }, | |
| { | |
| "epoch": 3.248730964467005, | |
| "grad_norm": 0.9101400606749903, | |
| "learning_rate": 2.5958426617583417e-05, | |
| "loss": 0.3395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16888044774532318, | |
| "step": 640, | |
| "valid_targets_mean": 2621.1, | |
| "valid_targets_min": 1312 | |
| }, | |
| { | |
| "epoch": 3.2741116751269037, | |
| "grad_norm": 0.8038426630515539, | |
| "learning_rate": 2.5716301114627663e-05, | |
| "loss": 0.3072, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1380758136510849, | |
| "step": 645, | |
| "valid_targets_mean": 2481.5, | |
| "valid_targets_min": 1006 | |
| }, | |
| { | |
| "epoch": 3.299492385786802, | |
| "grad_norm": 0.903322089787113, | |
| "learning_rate": 2.5473259801441663e-05, | |
| "loss": 0.3132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15146201848983765, | |
| "step": 650, | |
| "valid_targets_mean": 2501.9, | |
| "valid_targets_min": 395 | |
| }, | |
| { | |
| "epoch": 3.3248730964467006, | |
| "grad_norm": 0.8736692357160077, | |
| "learning_rate": 2.5229341615743423e-05, | |
| "loss": 0.3155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2004031538963318, | |
| "step": 655, | |
| "valid_targets_mean": 2913.1, | |
| "valid_targets_min": 1399 | |
| }, | |
| { | |
| "epoch": 3.350253807106599, | |
| "grad_norm": 0.9728468176146295, | |
| "learning_rate": 2.4984585635734995e-05, | |
| "loss": 0.325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15162807703018188, | |
| "step": 660, | |
| "valid_targets_mean": 1768.1, | |
| "valid_targets_min": 1212 | |
| }, | |
| { | |
| "epoch": 3.3756345177664975, | |
| "grad_norm": 0.8778493869779497, | |
| "learning_rate": 2.4739031073841652e-05, | |
| "loss": 0.3126, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15126122534275055, | |
| "step": 665, | |
| "valid_targets_mean": 2423.2, | |
| "valid_targets_min": 1162 | |
| }, | |
| { | |
| "epoch": 3.401015228426396, | |
| "grad_norm": 0.7552014337560956, | |
| "learning_rate": 2.4492717270429736e-05, | |
| "loss": 0.3149, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12232430279254913, | |
| "step": 670, | |
| "valid_targets_mean": 2353.0, | |
| "valid_targets_min": 717 | |
| }, | |
| { | |
| "epoch": 3.4263959390862944, | |
| "grad_norm": 0.8762298758120327, | |
| "learning_rate": 2.424568368750385e-05, | |
| "loss": 0.3321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16355693340301514, | |
| "step": 675, | |
| "valid_targets_mean": 2302.6, | |
| "valid_targets_min": 1026 | |
| }, | |
| { | |
| "epoch": 3.451776649746193, | |
| "grad_norm": 0.8036994015284806, | |
| "learning_rate": 2.3997969902384722e-05, | |
| "loss": 0.3168, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12401950359344482, | |
| "step": 680, | |
| "valid_targets_mean": 2053.5, | |
| "valid_targets_min": 978 | |
| }, | |
| { | |
| "epoch": 3.4771573604060912, | |
| "grad_norm": 0.7832269547659961, | |
| "learning_rate": 2.3749615601368434e-05, | |
| "loss": 0.309, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1947115808725357, | |
| "step": 685, | |
| "valid_targets_mean": 3647.2, | |
| "valid_targets_min": 2220 | |
| }, | |
| { | |
| "epoch": 3.50253807106599, | |
| "grad_norm": 0.9751722132017385, | |
| "learning_rate": 2.3500660573368305e-05, | |
| "loss": 0.3173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16104276478290558, | |
| "step": 690, | |
| "valid_targets_mean": 2197.1, | |
| "valid_targets_min": 993 | |
| }, | |
| { | |
| "epoch": 3.527918781725888, | |
| "grad_norm": 0.8627897598541073, | |
| "learning_rate": 2.3251144703540313e-05, | |
| "loss": 0.3074, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15009143948554993, | |
| "step": 695, | |
| "valid_targets_mean": 1942.6, | |
| "valid_targets_min": 1051 | |
| }, | |
| { | |
| "epoch": 3.553299492385787, | |
| "grad_norm": 0.8531625959139169, | |
| "learning_rate": 2.3001107966893054e-05, | |
| "loss": 0.3152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1586218774318695, | |
| "step": 700, | |
| "valid_targets_mean": 2793.2, | |
| "valid_targets_min": 1260 | |
| }, | |
| { | |
| "epoch": 3.5786802030456855, | |
| "grad_norm": 0.9602749420779956, | |
| "learning_rate": 2.2750590421883348e-05, | |
| "loss": 0.3211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1476803421974182, | |
| "step": 705, | |
| "valid_targets_mean": 1785.0, | |
| "valid_targets_min": 1266 | |
| }, | |
| { | |
| "epoch": 3.6040609137055837, | |
| "grad_norm": 0.785528907194096, | |
| "learning_rate": 2.2499632203998454e-05, | |
| "loss": 0.292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17573554813861847, | |
| "step": 710, | |
| "valid_targets_mean": 3039.0, | |
| "valid_targets_min": 1455 | |
| }, | |
| { | |
| "epoch": 3.6294416243654823, | |
| "grad_norm": 0.8225218380412574, | |
| "learning_rate": 2.224827351932596e-05, | |
| "loss": 0.3247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13142558932304382, | |
| "step": 715, | |
| "valid_targets_mean": 2333.9, | |
| "valid_targets_min": 1222 | |
| }, | |
| { | |
| "epoch": 3.6548223350253806, | |
| "grad_norm": 0.7879515274116892, | |
| "learning_rate": 2.1996554638112362e-05, | |
| "loss": 0.3082, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11903517693281174, | |
| "step": 720, | |
| "valid_targets_mean": 1829.2, | |
| "valid_targets_min": 1143 | |
| }, | |
| { | |
| "epoch": 3.6802030456852792, | |
| "grad_norm": 1.1349204285534138, | |
| "learning_rate": 2.174451588831134e-05, | |
| "loss": 0.3192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2044743299484253, | |
| "step": 725, | |
| "valid_targets_mean": 1458.9, | |
| "valid_targets_min": 329 | |
| }, | |
| { | |
| "epoch": 3.7055837563451774, | |
| "grad_norm": 0.9017031039608042, | |
| "learning_rate": 2.1492197649122794e-05, | |
| "loss": 0.347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20912542939186096, | |
| "step": 730, | |
| "valid_targets_mean": 2457.0, | |
| "valid_targets_min": 1013 | |
| }, | |
| { | |
| "epoch": 3.730964467005076, | |
| "grad_norm": 0.8310776673814788, | |
| "learning_rate": 2.1239640344523735e-05, | |
| "loss": 0.3118, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16861245036125183, | |
| "step": 735, | |
| "valid_targets_mean": 2512.9, | |
| "valid_targets_min": 801 | |
| }, | |
| { | |
| "epoch": 3.7563451776649748, | |
| "grad_norm": 0.9006914546413552, | |
| "learning_rate": 2.0986884436791875e-05, | |
| "loss": 0.3273, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11753588914871216, | |
| "step": 740, | |
| "valid_targets_mean": 1797.5, | |
| "valid_targets_min": 292 | |
| }, | |
| { | |
| "epoch": 3.781725888324873, | |
| "grad_norm": 0.7756837667890839, | |
| "learning_rate": 2.073397042002322e-05, | |
| "loss": 0.307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1464722752571106, | |
| "step": 745, | |
| "valid_targets_mean": 2359.9, | |
| "valid_targets_min": 1051 | |
| }, | |
| { | |
| "epoch": 3.8071065989847717, | |
| "grad_norm": 0.769625181048491, | |
| "learning_rate": 2.0480938813644443e-05, | |
| "loss": 0.3049, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13366997241973877, | |
| "step": 750, | |
| "valid_targets_mean": 2627.0, | |
| "valid_targets_min": 1478 | |
| }, | |
| { | |
| "epoch": 3.8324873096446703, | |
| "grad_norm": 1.0106054647061928, | |
| "learning_rate": 2.022783015592132e-05, | |
| "loss": 0.3286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17920148372650146, | |
| "step": 755, | |
| "valid_targets_mean": 1909.8, | |
| "valid_targets_min": 1184 | |
| }, | |
| { | |
| "epoch": 3.8578680203045685, | |
| "grad_norm": 0.8497529943702222, | |
| "learning_rate": 1.9974684997463986e-05, | |
| "loss": 0.3126, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17976191639900208, | |
| "step": 760, | |
| "valid_targets_mean": 2787.2, | |
| "valid_targets_min": 1320 | |
| }, | |
| { | |
| "epoch": 3.8832487309644668, | |
| "grad_norm": 0.8061578083088016, | |
| "learning_rate": 1.9721543894730428e-05, | |
| "loss": 0.3261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16275309026241302, | |
| "step": 765, | |
| "valid_targets_mean": 2191.4, | |
| "valid_targets_min": 947 | |
| }, | |
| { | |
| "epoch": 3.9086294416243654, | |
| "grad_norm": 0.8679040422902907, | |
| "learning_rate": 1.946844740352883e-05, | |
| "loss": 0.3214, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1434512734413147, | |
| "step": 770, | |
| "valid_targets_mean": 1871.5, | |
| "valid_targets_min": 293 | |
| }, | |
| { | |
| "epoch": 3.934010152284264, | |
| "grad_norm": 0.8967859956485915, | |
| "learning_rate": 1.9215436072520167e-05, | |
| "loss": 0.3225, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16888327896595, | |
| "step": 775, | |
| "valid_targets_mean": 2496.9, | |
| "valid_targets_min": 834 | |
| }, | |
| { | |
| "epoch": 3.9593908629441623, | |
| "grad_norm": 0.9420456448588729, | |
| "learning_rate": 1.8962550436721867e-05, | |
| "loss": 0.3207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1651691496372223, | |
| "step": 780, | |
| "valid_targets_mean": 1959.6, | |
| "valid_targets_min": 1067 | |
| }, | |
| { | |
| "epoch": 3.984771573604061, | |
| "grad_norm": 0.8058439772758408, | |
| "learning_rate": 1.8709831011013678e-05, | |
| "loss": 0.2859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12959708273410797, | |
| "step": 785, | |
| "valid_targets_mean": 2221.1, | |
| "valid_targets_min": 1226 | |
| }, | |
| { | |
| "epoch": 4.01015228426396, | |
| "grad_norm": 0.7923362785700253, | |
| "learning_rate": 1.8457318283646814e-05, | |
| "loss": 0.3028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14751429855823517, | |
| "step": 790, | |
| "valid_targets_mean": 3143.8, | |
| "valid_targets_min": 1676 | |
| }, | |
| { | |
| "epoch": 4.035532994923858, | |
| "grad_norm": 0.8817490825267614, | |
| "learning_rate": 1.8205052709757263e-05, | |
| "loss": 0.2866, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12734201550483704, | |
| "step": 795, | |
| "valid_targets_mean": 2243.2, | |
| "valid_targets_min": 1106 | |
| }, | |
| { | |
| "epoch": 4.060913705583756, | |
| "grad_norm": 0.811564426430684, | |
| "learning_rate": 1.79530747048845e-05, | |
| "loss": 0.288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1372891068458557, | |
| "step": 800, | |
| "valid_targets_mean": 3005.8, | |
| "valid_targets_min": 846 | |
| }, | |
| { | |
| "epoch": 4.086294416243655, | |
| "grad_norm": 1.0172125663810014, | |
| "learning_rate": 1.7701424638496473e-05, | |
| "loss": 0.2697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17663919925689697, | |
| "step": 805, | |
| "valid_targets_mean": 2413.2, | |
| "valid_targets_min": 879 | |
| }, | |
| { | |
| "epoch": 4.111675126903553, | |
| "grad_norm": 0.9356174719933191, | |
| "learning_rate": 1.7450142827522027e-05, | |
| "loss": 0.2927, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17932286858558655, | |
| "step": 810, | |
| "valid_targets_mean": 2165.0, | |
| "valid_targets_min": 1046 | |
| }, | |
| { | |
| "epoch": 4.137055837563452, | |
| "grad_norm": 0.9261663724619607, | |
| "learning_rate": 1.719926952989169e-05, | |
| "loss": 0.2745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1279735565185547, | |
| "step": 815, | |
| "valid_targets_mean": 2310.0, | |
| "valid_targets_min": 482 | |
| }, | |
| { | |
| "epoch": 4.16243654822335, | |
| "grad_norm": 0.9696079776656041, | |
| "learning_rate": 1.694884493808795e-05, | |
| "loss": 0.2827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16480299830436707, | |
| "step": 820, | |
| "valid_targets_mean": 2405.8, | |
| "valid_targets_min": 1108 | |
| }, | |
| { | |
| "epoch": 4.187817258883249, | |
| "grad_norm": 0.9220163515345636, | |
| "learning_rate": 1.6698909172706e-05, | |
| "loss": 0.2767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1316625326871872, | |
| "step": 825, | |
| "valid_targets_mean": 2018.5, | |
| "valid_targets_min": 1319 | |
| }, | |
| { | |
| "epoch": 4.213197969543147, | |
| "grad_norm": 0.9076143574371879, | |
| "learning_rate": 1.644950227602605e-05, | |
| "loss": 0.2829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1429370939731598, | |
| "step": 830, | |
| "valid_targets_mean": 2114.5, | |
| "valid_targets_min": 1262 | |
| }, | |
| { | |
| "epoch": 4.238578680203045, | |
| "grad_norm": 0.9803947236168491, | |
| "learning_rate": 1.620066420559805e-05, | |
| "loss": 0.2632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12085558474063873, | |
| "step": 835, | |
| "valid_targets_mean": 1590.6, | |
| "valid_targets_min": 849 | |
| }, | |
| { | |
| "epoch": 4.2639593908629445, | |
| "grad_norm": 1.0298161787059386, | |
| "learning_rate": 1.5952434827840187e-05, | |
| "loss": 0.2929, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1661471277475357, | |
| "step": 840, | |
| "valid_targets_mean": 2170.0, | |
| "valid_targets_min": 1001 | |
| }, | |
| { | |
| "epoch": 4.289340101522843, | |
| "grad_norm": 0.9569708540260855, | |
| "learning_rate": 1.5704853911651777e-05, | |
| "loss": 0.2714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1327221393585205, | |
| "step": 845, | |
| "valid_targets_mean": 2563.0, | |
| "valid_targets_min": 1147 | |
| }, | |
| { | |
| "epoch": 4.314720812182741, | |
| "grad_norm": 0.8053576371393363, | |
| "learning_rate": 1.545796112204196e-05, | |
| "loss": 0.2672, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07763060182332993, | |
| "step": 850, | |
| "valid_targets_mean": 1756.2, | |
| "valid_targets_min": 498 | |
| }, | |
| { | |
| "epoch": 4.340101522842639, | |
| "grad_norm": 0.9055675412263313, | |
| "learning_rate": 1.5211796013774893e-05, | |
| "loss": 0.2767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14815682172775269, | |
| "step": 855, | |
| "valid_targets_mean": 2407.4, | |
| "valid_targets_min": 959 | |
| }, | |
| { | |
| "epoch": 4.365482233502538, | |
| "grad_norm": 0.848903444043607, | |
| "learning_rate": 1.4966398025032706e-05, | |
| "loss": 0.2864, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14398732781410217, | |
| "step": 860, | |
| "valid_targets_mean": 2603.8, | |
| "valid_targets_min": 1046 | |
| }, | |
| { | |
| "epoch": 4.3908629441624365, | |
| "grad_norm": 0.8069661195210427, | |
| "learning_rate": 1.4721806471097104e-05, | |
| "loss": 0.2711, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21310167014598846, | |
| "step": 865, | |
| "valid_targets_mean": 3781.8, | |
| "valid_targets_min": 1261 | |
| }, | |
| { | |
| "epoch": 4.416243654822335, | |
| "grad_norm": 0.8380373426685702, | |
| "learning_rate": 1.4478060538050622e-05, | |
| "loss": 0.3041, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11656267940998077, | |
| "step": 870, | |
| "valid_targets_mean": 2404.8, | |
| "valid_targets_min": 1298 | |
| }, | |
| { | |
| "epoch": 4.441624365482234, | |
| "grad_norm": 0.9172317833122811, | |
| "learning_rate": 1.4235199276498652e-05, | |
| "loss": 0.2762, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14826105535030365, | |
| "step": 875, | |
| "valid_targets_mean": 2460.0, | |
| "valid_targets_min": 1559 | |
| }, | |
| { | |
| "epoch": 4.467005076142132, | |
| "grad_norm": 0.8926977376205748, | |
| "learning_rate": 1.3993261595313094e-05, | |
| "loss": 0.2726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16165882349014282, | |
| "step": 880, | |
| "valid_targets_mean": 2430.6, | |
| "valid_targets_min": 1124 | |
| }, | |
| { | |
| "epoch": 4.49238578680203, | |
| "grad_norm": 0.9194576389085425, | |
| "learning_rate": 1.3752286255398794e-05, | |
| "loss": 0.2927, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14470508694648743, | |
| "step": 885, | |
| "valid_targets_mean": 2208.0, | |
| "valid_targets_min": 1093 | |
| }, | |
| { | |
| "epoch": 4.517766497461929, | |
| "grad_norm": 1.0465128762566749, | |
| "learning_rate": 1.3512311863483606e-05, | |
| "loss": 0.2886, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1443568915128708, | |
| "step": 890, | |
| "valid_targets_mean": 1943.1, | |
| "valid_targets_min": 1047 | |
| }, | |
| { | |
| "epoch": 4.543147208121828, | |
| "grad_norm": 0.9517867035108439, | |
| "learning_rate": 1.3273376865933236e-05, | |
| "loss": 0.2746, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13925078511238098, | |
| "step": 895, | |
| "valid_targets_mean": 2709.4, | |
| "valid_targets_min": 1132 | |
| }, | |
| { | |
| "epoch": 4.568527918781726, | |
| "grad_norm": 1.0210841427352948, | |
| "learning_rate": 1.303551954259172e-05, | |
| "loss": 0.263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11771374195814133, | |
| "step": 900, | |
| "valid_targets_mean": 1918.6, | |
| "valid_targets_min": 281 | |
| }, | |
| { | |
| "epoch": 4.593908629441624, | |
| "grad_norm": 1.0680504135482736, | |
| "learning_rate": 1.2798778000648602e-05, | |
| "loss": 0.305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16731064021587372, | |
| "step": 905, | |
| "valid_targets_mean": 1758.8, | |
| "valid_targets_min": 663 | |
| }, | |
| { | |
| "epoch": 4.619289340101523, | |
| "grad_norm": 0.9639049022822739, | |
| "learning_rate": 1.2563190168533766e-05, | |
| "loss": 0.2869, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16094887256622314, | |
| "step": 910, | |
| "valid_targets_mean": 2596.8, | |
| "valid_targets_min": 1077 | |
| }, | |
| { | |
| "epoch": 4.644670050761421, | |
| "grad_norm": 1.0257145278211706, | |
| "learning_rate": 1.2328793789840926e-05, | |
| "loss": 0.2811, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13257353007793427, | |
| "step": 915, | |
| "valid_targets_mean": 2801.4, | |
| "valid_targets_min": 868 | |
| }, | |
| { | |
| "epoch": 4.67005076142132, | |
| "grad_norm": 0.7146965006699328, | |
| "learning_rate": 1.2095626417280686e-05, | |
| "loss": 0.2464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10758645832538605, | |
| "step": 920, | |
| "valid_targets_mean": 3030.4, | |
| "valid_targets_min": 779 | |
| }, | |
| { | |
| "epoch": 4.695431472081218, | |
| "grad_norm": 0.922142129356091, | |
| "learning_rate": 1.1863725406664241e-05, | |
| "loss": 0.2708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12253784388303757, | |
| "step": 925, | |
| "valid_targets_mean": 2085.0, | |
| "valid_targets_min": 1042 | |
| }, | |
| { | |
| "epoch": 4.720812182741117, | |
| "grad_norm": 0.9829158703940855, | |
| "learning_rate": 1.163312791091858e-05, | |
| "loss": 0.276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11979790776968002, | |
| "step": 930, | |
| "valid_targets_mean": 2673.2, | |
| "valid_targets_min": 1178 | |
| }, | |
| { | |
| "epoch": 4.746192893401015, | |
| "grad_norm": 0.9917842043347582, | |
| "learning_rate": 1.1403870874134192e-05, | |
| "loss": 0.2969, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2870727777481079, | |
| "step": 935, | |
| "valid_targets_mean": 3639.1, | |
| "valid_targets_min": 1065 | |
| }, | |
| { | |
| "epoch": 4.771573604060913, | |
| "grad_norm": 0.7355057489858818, | |
| "learning_rate": 1.1175991025646267e-05, | |
| "loss": 0.2586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13370351493358612, | |
| "step": 940, | |
| "valid_targets_mean": 3761.0, | |
| "valid_targets_min": 1076 | |
| }, | |
| { | |
| "epoch": 4.7969543147208125, | |
| "grad_norm": 0.9337802455576882, | |
| "learning_rate": 1.0949524874150246e-05, | |
| "loss": 0.2818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1805444359779358, | |
| "step": 945, | |
| "valid_targets_mean": 2492.8, | |
| "valid_targets_min": 997 | |
| }, | |
| { | |
| "epoch": 4.822335025380711, | |
| "grad_norm": 1.0058650819266446, | |
| "learning_rate": 1.0724508701852807e-05, | |
| "loss": 0.2645, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12135922163724899, | |
| "step": 950, | |
| "valid_targets_mean": 2615.0, | |
| "valid_targets_min": 1013 | |
| }, | |
| { | |
| "epoch": 4.847715736040609, | |
| "grad_norm": 0.887746711444636, | |
| "learning_rate": 1.0500978558659001e-05, | |
| "loss": 0.2732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11861827224493027, | |
| "step": 955, | |
| "valid_targets_mean": 2106.6, | |
| "valid_targets_min": 1026 | |
| }, | |
| { | |
| "epoch": 4.873096446700508, | |
| "grad_norm": 0.8819049641510628, | |
| "learning_rate": 1.0278970256396764e-05, | |
| "loss": 0.2978, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15796110033988953, | |
| "step": 960, | |
| "valid_targets_mean": 2376.9, | |
| "valid_targets_min": 1021 | |
| }, | |
| { | |
| "epoch": 4.898477157360406, | |
| "grad_norm": 1.0905411737905097, | |
| "learning_rate": 1.0058519363079464e-05, | |
| "loss": 0.29, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16498346626758575, | |
| "step": 965, | |
| "valid_targets_mean": 1871.6, | |
| "valid_targets_min": 1180 | |
| }, | |
| { | |
| "epoch": 4.9238578680203045, | |
| "grad_norm": 0.900093426991199, | |
| "learning_rate": 9.839661197207527e-06, | |
| "loss": 0.2863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12465168535709381, | |
| "step": 970, | |
| "valid_targets_mean": 2361.0, | |
| "valid_targets_min": 1292 | |
| }, | |
| { | |
| "epoch": 4.949238578680203, | |
| "grad_norm": 1.0139948291819079, | |
| "learning_rate": 9.622430822110063e-06, | |
| "loss": 0.306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1524374783039093, | |
| "step": 975, | |
| "valid_targets_mean": 1875.8, | |
| "valid_targets_min": 990 | |
| }, | |
| { | |
| "epoch": 4.974619289340102, | |
| "grad_norm": 1.0449824510291859, | |
| "learning_rate": 9.40686304032735e-06, | |
| "loss": 0.2732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1726701259613037, | |
| "step": 980, | |
| "valid_targets_mean": 2441.1, | |
| "valid_targets_min": 2117 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.9373152237759808, | |
| "learning_rate": 9.19299238803515e-06, | |
| "loss": 0.2712, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11312133818864822, | |
| "step": 985, | |
| "valid_targets_mean": 1897.9, | |
| "valid_targets_min": 1095 | |
| }, | |
| { | |
| "epoch": 5.025380710659898, | |
| "grad_norm": 0.7926669218335288, | |
| "learning_rate": 8.980853129511584e-06, | |
| "loss": 0.2557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15544377267360687, | |
| "step": 990, | |
| "valid_targets_mean": 3132.6, | |
| "valid_targets_min": 1287 | |
| }, | |
| { | |
| "epoch": 5.050761421319797, | |
| "grad_norm": 0.9251745469506645, | |
| "learning_rate": 8.770479251647708e-06, | |
| "loss": 0.2549, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10877332836389542, | |
| "step": 995, | |
| "valid_targets_mean": 2343.2, | |
| "valid_targets_min": 977 | |
| }, | |
| { | |
| "epoch": 5.0761421319796955, | |
| "grad_norm": 0.9272380990444373, | |
| "learning_rate": 8.561904458502424e-06, | |
| "loss": 0.2553, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16089127957820892, | |
| "step": 1000, | |
| "valid_targets_mean": 2838.4, | |
| "valid_targets_min": 997 | |
| }, | |
| { | |
| "epoch": 5.101522842639594, | |
| "grad_norm": 0.9657786473953105, | |
| "learning_rate": 8.355162165902785e-06, | |
| "loss": 0.2701, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11014437675476074, | |
| "step": 1005, | |
| "valid_targets_mean": 2422.8, | |
| "valid_targets_min": 508 | |
| }, | |
| { | |
| "epoch": 5.126903553299492, | |
| "grad_norm": 1.1440481143388213, | |
| "learning_rate": 8.150285496090388e-06, | |
| "loss": 0.2546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11987794190645218, | |
| "step": 1010, | |
| "valid_targets_mean": 1924.6, | |
| "valid_targets_min": 1013 | |
| }, | |
| { | |
| "epoch": 5.152284263959391, | |
| "grad_norm": 0.9481516578247208, | |
| "learning_rate": 7.947307272414874e-06, | |
| "loss": 0.2666, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12054525315761566, | |
| "step": 1015, | |
| "valid_targets_mean": 2759.8, | |
| "valid_targets_min": 1979 | |
| }, | |
| { | |
| "epoch": 5.177664974619289, | |
| "grad_norm": 0.8900118367067522, | |
| "learning_rate": 7.746260014075293e-06, | |
| "loss": 0.2658, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1677006185054779, | |
| "step": 1020, | |
| "valid_targets_mean": 3171.0, | |
| "valid_targets_min": 1586 | |
| }, | |
| { | |
| "epoch": 5.2030456852791875, | |
| "grad_norm": 0.9446860024372655, | |
| "learning_rate": 7.547175930910187e-06, | |
| "loss": 0.2772, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12553545832633972, | |
| "step": 1025, | |
| "valid_targets_mean": 2253.2, | |
| "valid_targets_min": 989 | |
| }, | |
| { | |
| "epoch": 5.228426395939087, | |
| "grad_norm": 1.0092812527349864, | |
| "learning_rate": 7.350086918237238e-06, | |
| "loss": 0.2441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13850677013397217, | |
| "step": 1030, | |
| "valid_targets_mean": 2710.8, | |
| "valid_targets_min": 1422 | |
| }, | |
| { | |
| "epoch": 5.253807106598985, | |
| "grad_norm": 0.9400152581379698, | |
| "learning_rate": 7.155024551743317e-06, | |
| "loss": 0.2669, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12904059886932373, | |
| "step": 1035, | |
| "valid_targets_mean": 2357.6, | |
| "valid_targets_min": 1317 | |
| }, | |
| { | |
| "epoch": 5.279187817258883, | |
| "grad_norm": 0.9286640611302993, | |
| "learning_rate": 6.962020082425749e-06, | |
| "loss": 0.2456, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11589126288890839, | |
| "step": 1040, | |
| "valid_targets_mean": 2505.4, | |
| "valid_targets_min": 1083 | |
| }, | |
| { | |
| "epoch": 5.304568527918782, | |
| "grad_norm": 0.9294621710654248, | |
| "learning_rate": 6.771104431585551e-06, | |
| "loss": 0.2519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0984276682138443, | |
| "step": 1045, | |
| "valid_targets_mean": 1809.6, | |
| "valid_targets_min": 801 | |
| }, | |
| { | |
| "epoch": 5.32994923857868, | |
| "grad_norm": 1.0852871883002646, | |
| "learning_rate": 6.582308185873536e-06, | |
| "loss": 0.2748, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13740497827529907, | |
| "step": 1050, | |
| "valid_targets_mean": 2055.0, | |
| "valid_targets_min": 925 | |
| }, | |
| { | |
| "epoch": 5.355329949238579, | |
| "grad_norm": 0.8517072925008696, | |
| "learning_rate": 6.3956615923900214e-06, | |
| "loss": 0.2582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13899219036102295, | |
| "step": 1055, | |
| "valid_targets_mean": 2988.9, | |
| "valid_targets_min": 926 | |
| }, | |
| { | |
| "epoch": 5.380710659898477, | |
| "grad_norm": 1.0064260155062217, | |
| "learning_rate": 6.211194553838931e-06, | |
| "loss": 0.2501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15219058096408844, | |
| "step": 1060, | |
| "valid_targets_mean": 2646.2, | |
| "valid_targets_min": 960 | |
| }, | |
| { | |
| "epoch": 5.406091370558376, | |
| "grad_norm": 0.953883752640116, | |
| "learning_rate": 6.028936623737067e-06, | |
| "loss": 0.2543, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17078690230846405, | |
| "step": 1065, | |
| "valid_targets_mean": 3289.1, | |
| "valid_targets_min": 1180 | |
| }, | |
| { | |
| "epoch": 5.431472081218274, | |
| "grad_norm": 0.8490004002408478, | |
| "learning_rate": 5.848917001679339e-06, | |
| "loss": 0.2579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13909263908863068, | |
| "step": 1070, | |
| "valid_targets_mean": 2893.4, | |
| "valid_targets_min": 299 | |
| }, | |
| { | |
| "epoch": 5.456852791878172, | |
| "grad_norm": 1.1776547438514429, | |
| "learning_rate": 5.671164528660687e-06, | |
| "loss": 0.2572, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14180824160575867, | |
| "step": 1075, | |
| "valid_targets_mean": 2674.9, | |
| "valid_targets_min": 787 | |
| }, | |
| { | |
| "epoch": 5.482233502538071, | |
| "grad_norm": 1.0869939396664519, | |
| "learning_rate": 5.495707682455464e-06, | |
| "loss": 0.2573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12028077244758606, | |
| "step": 1080, | |
| "valid_targets_mean": 1954.1, | |
| "valid_targets_min": 395 | |
| }, | |
| { | |
| "epoch": 5.50761421319797, | |
| "grad_norm": 0.8850078662752396, | |
| "learning_rate": 5.322574573054991e-06, | |
| "loss": 0.247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12053519487380981, | |
| "step": 1085, | |
| "valid_targets_mean": 3040.0, | |
| "valid_targets_min": 1024 | |
| }, | |
| { | |
| "epoch": 5.532994923857868, | |
| "grad_norm": 0.9136784171950727, | |
| "learning_rate": 5.151792938164051e-06, | |
| "loss": 0.2527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12860670685768127, | |
| "step": 1090, | |
| "valid_targets_mean": 3044.2, | |
| "valid_targets_min": 911 | |
| }, | |
| { | |
| "epoch": 5.558375634517766, | |
| "grad_norm": 1.0173457673617277, | |
| "learning_rate": 4.983390138757027e-06, | |
| "loss": 0.249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11640293151140213, | |
| "step": 1095, | |
| "valid_targets_mean": 2169.0, | |
| "valid_targets_min": 361 | |
| }, | |
| { | |
| "epoch": 5.583756345177665, | |
| "grad_norm": 0.9200254654181008, | |
| "learning_rate": 4.817393154694399e-06, | |
| "loss": 0.2634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09641244262456894, | |
| "step": 1100, | |
| "valid_targets_mean": 1634.8, | |
| "valid_targets_min": 1155 | |
| }, | |
| { | |
| "epoch": 5.6091370558375635, | |
| "grad_norm": 0.9590562047545811, | |
| "learning_rate": 4.653828580400275e-06, | |
| "loss": 0.2325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1131504476070404, | |
| "step": 1105, | |
| "valid_targets_mean": 2329.9, | |
| "valid_targets_min": 1633 | |
| }, | |
| { | |
| "epoch": 5.634517766497462, | |
| "grad_norm": 0.99617246926484, | |
| "learning_rate": 4.4927226206017e-06, | |
| "loss": 0.2703, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15851570665836334, | |
| "step": 1110, | |
| "valid_targets_mean": 2249.6, | |
| "valid_targets_min": 1396 | |
| }, | |
| { | |
| "epoch": 5.659898477157361, | |
| "grad_norm": 1.022233372274678, | |
| "learning_rate": 4.334101086130409e-06, | |
| "loss": 0.2426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15162095427513123, | |
| "step": 1115, | |
| "valid_targets_mean": 2983.9, | |
| "valid_targets_min": 795 | |
| }, | |
| { | |
| "epoch": 5.685279187817259, | |
| "grad_norm": 0.9281317310810118, | |
| "learning_rate": 4.177989389787625e-06, | |
| "loss": 0.2505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12421190738677979, | |
| "step": 1120, | |
| "valid_targets_mean": 2495.0, | |
| "valid_targets_min": 1154 | |
| }, | |
| { | |
| "epoch": 5.710659898477157, | |
| "grad_norm": 0.9507092254457873, | |
| "learning_rate": 4.024412542272706e-06, | |
| "loss": 0.249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14842386543750763, | |
| "step": 1125, | |
| "valid_targets_mean": 2852.2, | |
| "valid_targets_min": 1436 | |
| }, | |
| { | |
| "epoch": 5.7360406091370555, | |
| "grad_norm": 0.9527860854462858, | |
| "learning_rate": 3.873395148176135e-06, | |
| "loss": 0.2659, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11915292590856552, | |
| "step": 1130, | |
| "valid_targets_mean": 2597.9, | |
| "valid_targets_min": 1289 | |
| }, | |
| { | |
| "epoch": 5.761421319796955, | |
| "grad_norm": 0.9308894637719127, | |
| "learning_rate": 3.724961402037661e-06, | |
| "loss": 0.2473, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12573911249637604, | |
| "step": 1135, | |
| "valid_targets_mean": 2650.9, | |
| "valid_targets_min": 1046 | |
| }, | |
| { | |
| "epoch": 5.786802030456853, | |
| "grad_norm": 0.8959759836358597, | |
| "learning_rate": 3.57913508447004e-06, | |
| "loss": 0.2448, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10931817442178726, | |
| "step": 1140, | |
| "valid_targets_mean": 2622.6, | |
| "valid_targets_min": 1115 | |
| }, | |
| { | |
| "epoch": 5.812182741116751, | |
| "grad_norm": 1.0985606002890143, | |
| "learning_rate": 3.4359395583491594e-06, | |
| "loss": 0.2441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16124585270881653, | |
| "step": 1145, | |
| "valid_targets_mean": 2679.9, | |
| "valid_targets_min": 1145 | |
| }, | |
| { | |
| "epoch": 5.837563451776649, | |
| "grad_norm": 0.999132265083642, | |
| "learning_rate": 3.2953977650710513e-06, | |
| "loss": 0.2606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12139269709587097, | |
| "step": 1150, | |
| "valid_targets_mean": 2462.9, | |
| "valid_targets_min": 1242 | |
| }, | |
| { | |
| "epoch": 5.862944162436548, | |
| "grad_norm": 0.9773626061096712, | |
| "learning_rate": 3.1575322208764714e-06, | |
| "loss": 0.246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13929663598537445, | |
| "step": 1155, | |
| "valid_targets_mean": 2238.6, | |
| "valid_targets_min": 1388 | |
| }, | |
| { | |
| "epoch": 5.888324873096447, | |
| "grad_norm": 0.8513865275091448, | |
| "learning_rate": 3.0223650132435335e-06, | |
| "loss": 0.2597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10711924731731415, | |
| "step": 1160, | |
| "valid_targets_mean": 2403.5, | |
| "valid_targets_min": 849 | |
| }, | |
| { | |
| "epoch": 5.913705583756345, | |
| "grad_norm": 1.0153155674068752, | |
| "learning_rate": 2.8899177973490734e-06, | |
| "loss": 0.2564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13925692439079285, | |
| "step": 1165, | |
| "valid_targets_mean": 2212.4, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 5.939086294416244, | |
| "grad_norm": 0.8959815269726535, | |
| "learning_rate": 2.7602117925992964e-06, | |
| "loss": 0.2538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08277732878923416, | |
| "step": 1170, | |
| "valid_targets_mean": 2028.4, | |
| "valid_targets_min": 1009 | |
| }, | |
| { | |
| "epoch": 5.964467005076142, | |
| "grad_norm": 0.9079179631720385, | |
| "learning_rate": 2.6332677792301773e-06, | |
| "loss": 0.234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11223854869604111, | |
| "step": 1175, | |
| "valid_targets_mean": 2755.0, | |
| "valid_targets_min": 1083 | |
| }, | |
| { | |
| "epoch": 5.98984771573604, | |
| "grad_norm": 0.9347317535565745, | |
| "learning_rate": 2.5091060949782664e-06, | |
| "loss": 0.2542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14246557652950287, | |
| "step": 1180, | |
| "valid_targets_mean": 3177.5, | |
| "valid_targets_min": 1675 | |
| }, | |
| { | |
| "epoch": 6.0152284263959395, | |
| "grad_norm": 0.9650623750727744, | |
| "learning_rate": 2.3877466318223698e-06, | |
| "loss": 0.2437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10319985449314117, | |
| "step": 1185, | |
| "valid_targets_mean": 2075.1, | |
| "valid_targets_min": 525 | |
| }, | |
| { | |
| "epoch": 6.040609137055838, | |
| "grad_norm": 0.8339126985510028, | |
| "learning_rate": 2.2692088327966655e-06, | |
| "loss": 0.2396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10419311374425888, | |
| "step": 1190, | |
| "valid_targets_mean": 3008.8, | |
| "valid_targets_min": 1633 | |
| }, | |
| { | |
| "epoch": 6.065989847715736, | |
| "grad_norm": 0.7474364288192638, | |
| "learning_rate": 2.153511688875707e-06, | |
| "loss": 0.2258, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11047440022230148, | |
| "step": 1195, | |
| "valid_targets_mean": 3757.4, | |
| "valid_targets_min": 936 | |
| }, | |
| { | |
| "epoch": 6.091370558375634, | |
| "grad_norm": 0.98041966083737, | |
| "learning_rate": 2.0406737359318797e-06, | |
| "loss": 0.2629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12280648946762085, | |
| "step": 1200, | |
| "valid_targets_mean": 2120.6, | |
| "valid_targets_min": 360 | |
| }, | |
| { | |
| "epoch": 6.116751269035533, | |
| "grad_norm": 1.0198475030804934, | |
| "learning_rate": 1.930713051765776e-06, | |
| "loss": 0.2483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09961174428462982, | |
| "step": 1205, | |
| "valid_targets_mean": 1937.1, | |
| "valid_targets_min": 1127 | |
| }, | |
| { | |
| "epoch": 6.1421319796954315, | |
| "grad_norm": 0.9437418507000886, | |
| "learning_rate": 1.8236472532099413e-06, | |
| "loss": 0.2653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09551151841878891, | |
| "step": 1210, | |
| "valid_targets_mean": 2320.2, | |
| "valid_targets_min": 1122 | |
| }, | |
| { | |
| "epoch": 6.16751269035533, | |
| "grad_norm": 1.0067081945002703, | |
| "learning_rate": 1.7194934933064654e-06, | |
| "loss": 0.2467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11312228441238403, | |
| "step": 1215, | |
| "valid_targets_mean": 2155.1, | |
| "valid_targets_min": 1108 | |
| }, | |
| { | |
| "epoch": 6.192893401015229, | |
| "grad_norm": 0.890380894514622, | |
| "learning_rate": 1.6182684585588981e-06, | |
| "loss": 0.2391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11702115833759308, | |
| "step": 1220, | |
| "valid_targets_mean": 2633.2, | |
| "valid_targets_min": 1372 | |
| }, | |
| { | |
| "epoch": 6.218274111675127, | |
| "grad_norm": 0.9041801647836321, | |
| "learning_rate": 1.5199883662588954e-06, | |
| "loss": 0.2351, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10423216968774796, | |
| "step": 1225, | |
| "valid_targets_mean": 2829.2, | |
| "valid_targets_min": 1190 | |
| }, | |
| { | |
| "epoch": 6.243654822335025, | |
| "grad_norm": 0.971484442221189, | |
| "learning_rate": 1.4246689618880472e-06, | |
| "loss": 0.2509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12599416077136993, | |
| "step": 1230, | |
| "valid_targets_mean": 2065.6, | |
| "valid_targets_min": 1166 | |
| }, | |
| { | |
| "epoch": 6.269035532994923, | |
| "grad_norm": 1.0397334165105048, | |
| "learning_rate": 1.3323255165952875e-06, | |
| "loss": 0.2452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1000349223613739, | |
| "step": 1235, | |
| "valid_targets_mean": 1802.4, | |
| "valid_targets_min": 1041 | |
| }, | |
| { | |
| "epoch": 6.2944162436548226, | |
| "grad_norm": 0.9722470098317174, | |
| "learning_rate": 1.2429728247502926e-06, | |
| "loss": 0.2372, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1916620135307312, | |
| "step": 1240, | |
| "valid_targets_mean": 3071.5, | |
| "valid_targets_min": 876 | |
| }, | |
| { | |
| "epoch": 6.319796954314721, | |
| "grad_norm": 1.0179156477550857, | |
| "learning_rate": 1.156625201573287e-06, | |
| "loss": 0.2529, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1264421045780182, | |
| "step": 1245, | |
| "valid_targets_mean": 2721.6, | |
| "valid_targets_min": 293 | |
| }, | |
| { | |
| "epoch": 6.345177664974619, | |
| "grad_norm": 1.048287346549327, | |
| "learning_rate": 1.0732964808415792e-06, | |
| "loss": 0.2599, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11165352165699005, | |
| "step": 1250, | |
| "valid_targets_mean": 2093.5, | |
| "valid_targets_min": 1116 | |
| }, | |
| { | |
| "epoch": 6.370558375634518, | |
| "grad_norm": 0.8163389750191538, | |
| "learning_rate": 9.93000012673262e-07, | |
| "loss": 0.228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14192290604114532, | |
| "step": 1255, | |
| "valid_targets_mean": 3407.8, | |
| "valid_targets_min": 1010 | |
| }, | |
| { | |
| "epoch": 6.395939086294416, | |
| "grad_norm": 1.0294994216450304, | |
| "learning_rate": 9.157486613883759e-07, | |
| "loss": 0.2437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13059166073799133, | |
| "step": 1260, | |
| "valid_targets_mean": 1657.2, | |
| "valid_targets_min": 913 | |
| }, | |
| { | |
| "epoch": 6.4213197969543145, | |
| "grad_norm": 1.0422176024676824, | |
| "learning_rate": 8.415548034479215e-07, | |
| "loss": 0.255, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1064755767583847, | |
| "step": 1265, | |
| "valid_targets_mean": 2209.9, | |
| "valid_targets_min": 1136 | |
| }, | |
| { | |
| "epoch": 6.446700507614214, | |
| "grad_norm": 1.235014195884799, | |
| "learning_rate": 7.704303254710188e-07, | |
| "loss": 0.2317, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09507836401462555, | |
| "step": 1270, | |
| "valid_targets_mean": 2677.4, | |
| "valid_targets_min": 1014 | |
| }, | |
| { | |
| "epoch": 6.472081218274112, | |
| "grad_norm": 1.0549720907684328, | |
| "learning_rate": 7.023866223305487e-07, | |
| "loss": 0.2469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11979396641254425, | |
| "step": 1275, | |
| "valid_targets_mean": 2131.8, | |
| "valid_targets_min": 981 | |
| }, | |
| { | |
| "epoch": 6.49746192893401, | |
| "grad_norm": 0.992296954375115, | |
| "learning_rate": 6.374345953275773e-07, | |
| "loss": 0.2456, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11428581178188324, | |
| "step": 1280, | |
| "valid_targets_mean": 2016.8, | |
| "valid_targets_min": 1251 | |
| }, | |
| { | |
| "epoch": 6.522842639593908, | |
| "grad_norm": 1.0032523115406662, | |
| "learning_rate": 5.755846504448604e-07, | |
| "loss": 0.2371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11331555247306824, | |
| "step": 1285, | |
| "valid_targets_mean": 2159.6, | |
| "valid_targets_min": 1436 | |
| }, | |
| { | |
| "epoch": 6.548223350253807, | |
| "grad_norm": 0.9517700067388988, | |
| "learning_rate": 5.16846696679687e-07, | |
| "loss": 0.2582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11859509348869324, | |
| "step": 1290, | |
| "valid_targets_mean": 2427.4, | |
| "valid_targets_min": 1475 | |
| }, | |
| { | |
| "epoch": 6.573604060913706, | |
| "grad_norm": 0.7721200421281694, | |
| "learning_rate": 4.6123014445636605e-07, | |
| "loss": 0.2443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08160974085330963, | |
| "step": 1295, | |
| "valid_targets_mean": 2601.8, | |
| "valid_targets_min": 1040 | |
| }, | |
| { | |
| "epoch": 6.598984771573604, | |
| "grad_norm": 0.9140077477818886, | |
| "learning_rate": 4.087439041185781e-07, | |
| "loss": 0.2393, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09139326214790344, | |
| "step": 1300, | |
| "valid_targets_mean": 2322.6, | |
| "valid_targets_min": 832 | |
| }, | |
| { | |
| "epoch": 6.624365482233502, | |
| "grad_norm": 1.07004687623224, | |
| "learning_rate": 3.5939638450183776e-07, | |
| "loss": 0.2574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16664224863052368, | |
| "step": 1305, | |
| "valid_targets_mean": 2209.2, | |
| "valid_targets_min": 896 | |
| }, | |
| { | |
| "epoch": 6.649746192893401, | |
| "grad_norm": 0.9914655537015288, | |
| "learning_rate": 3.1319549158632444e-07, | |
| "loss": 0.2342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09106150269508362, | |
| "step": 1310, | |
| "valid_targets_mean": 1496.8, | |
| "valid_targets_min": 846 | |
| }, | |
| { | |
| "epoch": 6.675126903553299, | |
| "grad_norm": 0.998951464628241, | |
| "learning_rate": 2.701486272302534e-07, | |
| "loss": 0.2364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09747333824634552, | |
| "step": 1315, | |
| "valid_targets_mean": 1789.2, | |
| "valid_targets_min": 990 | |
| }, | |
| { | |
| "epoch": 6.700507614213198, | |
| "grad_norm": 0.9002671317698705, | |
| "learning_rate": 2.302626879840353e-07, | |
| "loss": 0.2565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13508570194244385, | |
| "step": 1320, | |
| "valid_targets_mean": 2941.5, | |
| "valid_targets_min": 1487 | |
| }, | |
| { | |
| "epoch": 6.725888324873097, | |
| "grad_norm": 0.8743963104207289, | |
| "learning_rate": 1.9354406398535363e-07, | |
| "loss": 0.2309, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13274939358234406, | |
| "step": 1325, | |
| "valid_targets_mean": 3342.8, | |
| "valid_targets_min": 1121 | |
| }, | |
| { | |
| "epoch": 6.751269035532995, | |
| "grad_norm": 0.9310217194018677, | |
| "learning_rate": 1.599986379354257e-07, | |
| "loss": 0.2524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1376674473285675, | |
| "step": 1330, | |
| "valid_targets_mean": 2423.2, | |
| "valid_targets_min": 743 | |
| }, | |
| { | |
| "epoch": 6.776649746192893, | |
| "grad_norm": 0.9280165757856627, | |
| "learning_rate": 1.29631784156512e-07, | |
| "loss": 0.2246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14213162660598755, | |
| "step": 1335, | |
| "valid_targets_mean": 2780.5, | |
| "valid_targets_min": 1301 | |
| }, | |
| { | |
| "epoch": 6.802030456852792, | |
| "grad_norm": 0.9707954483311023, | |
| "learning_rate": 1.0244836773091182e-07, | |
| "loss": 0.2299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12803569436073303, | |
| "step": 1340, | |
| "valid_targets_mean": 3245.4, | |
| "valid_targets_min": 977 | |
| }, | |
| { | |
| "epoch": 6.8274111675126905, | |
| "grad_norm": 1.1217146591780642, | |
| "learning_rate": 7.845274372151767e-08, | |
| "loss": 0.2366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09864965081214905, | |
| "step": 1345, | |
| "valid_targets_mean": 1694.6, | |
| "valid_targets_min": 787 | |
| }, | |
| { | |
| "epoch": 6.852791878172589, | |
| "grad_norm": 0.8664511542186972, | |
| "learning_rate": 5.7648756474084636e-08, | |
| "loss": 0.2389, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09746871888637543, | |
| "step": 1350, | |
| "valid_targets_mean": 1966.5, | |
| "valid_targets_min": 417 | |
| }, | |
| { | |
| "epoch": 6.878172588832487, | |
| "grad_norm": 0.93357375736198, | |
| "learning_rate": 4.003973900133851e-08, | |
| "loss": 0.2403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1525709331035614, | |
| "step": 1355, | |
| "valid_targets_mean": 2738.1, | |
| "valid_targets_min": 508 | |
| }, | |
| { | |
| "epoch": 6.903553299492386, | |
| "grad_norm": 0.9786494242138831, | |
| "learning_rate": 2.5628512448987453e-08, | |
| "loss": 0.2497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13262221217155457, | |
| "step": 1360, | |
| "valid_targets_mean": 2689.0, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 6.928934010152284, | |
| "grad_norm": 1.0963747097882328, | |
| "learning_rate": 1.4417385643741289e-08, | |
| "loss": 0.2481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13900740444660187, | |
| "step": 1365, | |
| "valid_targets_mean": 2598.9, | |
| "valid_targets_min": 1350 | |
| }, | |
| { | |
| "epoch": 6.9543147208121825, | |
| "grad_norm": 0.9462888101664159, | |
| "learning_rate": 6.408154723420712e-09, | |
| "loss": 0.2395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10249761492013931, | |
| "step": 1370, | |
| "valid_targets_mean": 2344.4, | |
| "valid_targets_min": 994 | |
| }, | |
| { | |
| "epoch": 6.979695431472082, | |
| "grad_norm": 0.9690061063022448, | |
| "learning_rate": 1.6021028491941538e-09, | |
| "loss": 0.2374, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1248205229640007, | |
| "step": 1375, | |
| "valid_targets_mean": 2592.9, | |
| "valid_targets_min": 1056 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12962277233600616, | |
| "step": 1379, | |
| "total_flos": 2.2298248732265677e+17, | |
| "train_loss": 0.3448181322210504, | |
| "train_runtime": 6375.7964, | |
| "train_samples_per_second": 3.456, | |
| "train_steps_per_second": 0.216, | |
| "valid_targets_mean": 2546.5, | |
| "valid_targets_min": 1046 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1379, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 1500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.2298248732265677e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |