| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 785, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 7.992597780897032, | |
| "learning_rate": 2.0253164556962026e-06, | |
| "loss": 0.7346, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1965068280696869, | |
| "step": 5, | |
| "valid_targets_mean": 5113.5, | |
| "valid_targets_min": 3169 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 4.905164971122138, | |
| "learning_rate": 4.556962025316456e-06, | |
| "loss": 0.7062, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16786593198776245, | |
| "step": 10, | |
| "valid_targets_mean": 5742.6, | |
| "valid_targets_min": 4001 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 2.4265542623454994, | |
| "learning_rate": 7.08860759493671e-06, | |
| "loss": 0.6469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1609123945236206, | |
| "step": 15, | |
| "valid_targets_mean": 5070.8, | |
| "valid_targets_min": 1187 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.9695481834207988, | |
| "learning_rate": 9.620253164556963e-06, | |
| "loss": 0.5955, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.152688130736351, | |
| "step": 20, | |
| "valid_targets_mean": 5043.4, | |
| "valid_targets_min": 3778 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.8556904894549953, | |
| "learning_rate": 1.2151898734177216e-05, | |
| "loss": 0.5631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1266036331653595, | |
| "step": 25, | |
| "valid_targets_mean": 5028.5, | |
| "valid_targets_min": 3598 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.5406877764182748, | |
| "learning_rate": 1.468354430379747e-05, | |
| "loss": 0.5366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12929826974868774, | |
| "step": 30, | |
| "valid_targets_mean": 5412.2, | |
| "valid_targets_min": 3452 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 0.4491232079388601, | |
| "learning_rate": 1.7215189873417723e-05, | |
| "loss": 0.5279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12588968873023987, | |
| "step": 35, | |
| "valid_targets_mean": 5830.6, | |
| "valid_targets_min": 1236 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.3734935489185054, | |
| "learning_rate": 1.974683544303798e-05, | |
| "loss": 0.4878, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1219080314040184, | |
| "step": 40, | |
| "valid_targets_mean": 5462.8, | |
| "valid_targets_min": 3876 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.3243375480472286, | |
| "learning_rate": 2.2278481012658228e-05, | |
| "loss": 0.4516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11703348159790039, | |
| "step": 45, | |
| "valid_targets_mean": 5427.9, | |
| "valid_targets_min": 3616 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.2882643420638927, | |
| "learning_rate": 2.481012658227848e-05, | |
| "loss": 0.4482, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10204213112592697, | |
| "step": 50, | |
| "valid_targets_mean": 5454.8, | |
| "valid_targets_min": 1708 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 0.2769504438340826, | |
| "learning_rate": 2.7341772151898737e-05, | |
| "loss": 0.4342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11272697150707245, | |
| "step": 55, | |
| "valid_targets_mean": 5620.4, | |
| "valid_targets_min": 1322 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.2620427633098365, | |
| "learning_rate": 2.987341772151899e-05, | |
| "loss": 0.4211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10937047749757767, | |
| "step": 60, | |
| "valid_targets_mean": 5090.8, | |
| "valid_targets_min": 3842 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 0.2463435677646114, | |
| "learning_rate": 3.240506329113924e-05, | |
| "loss": 0.4135, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10371287912130356, | |
| "step": 65, | |
| "valid_targets_mean": 5944.2, | |
| "valid_targets_min": 3980 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.2439380748245345, | |
| "learning_rate": 3.49367088607595e-05, | |
| "loss": 0.4112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11104129254817963, | |
| "step": 70, | |
| "valid_targets_mean": 6091.2, | |
| "valid_targets_min": 4103 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.2592892650393887, | |
| "learning_rate": 3.746835443037975e-05, | |
| "loss": 0.392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09229408204555511, | |
| "step": 75, | |
| "valid_targets_mean": 5581.8, | |
| "valid_targets_min": 3931 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.27965263535503615, | |
| "learning_rate": 4e-05, | |
| "loss": 0.3945, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09553131461143494, | |
| "step": 80, | |
| "valid_targets_mean": 5852.2, | |
| "valid_targets_min": 3736 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 0.23703955142453972, | |
| "learning_rate": 3.999504991751045e-05, | |
| "loss": 0.3795, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09883347153663635, | |
| "step": 85, | |
| "valid_targets_mean": 6116.4, | |
| "valid_targets_min": 1188 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.27006174007491235, | |
| "learning_rate": 3.9980202120373464e-05, | |
| "loss": 0.392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09063629806041718, | |
| "step": 90, | |
| "valid_targets_mean": 4680.5, | |
| "valid_targets_min": 638 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 0.26947357129102495, | |
| "learning_rate": 3.995546395837111e-05, | |
| "loss": 0.3846, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1091674342751503, | |
| "step": 95, | |
| "valid_targets_mean": 6072.8, | |
| "valid_targets_min": 3603 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.2927613874979733, | |
| "learning_rate": 3.992084767709763e-05, | |
| "loss": 0.3735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09993791580200195, | |
| "step": 100, | |
| "valid_targets_mean": 5355.8, | |
| "valid_targets_min": 826 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 0.24121791608110918, | |
| "learning_rate": 3.987637041189781e-05, | |
| "loss": 0.3845, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09499676525592804, | |
| "step": 105, | |
| "valid_targets_mean": 6090.1, | |
| "valid_targets_min": 3190 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.24568965637675186, | |
| "learning_rate": 3.982205417938482e-05, | |
| "loss": 0.3783, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09854920208454132, | |
| "step": 110, | |
| "valid_targets_mean": 5208.7, | |
| "valid_targets_min": 3716 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 0.2440314012575419, | |
| "learning_rate": 3.975792586654179e-05, | |
| "loss": 0.3661, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08172737061977386, | |
| "step": 115, | |
| "valid_targets_mean": 5737.1, | |
| "valid_targets_min": 2032 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.25094578582914895, | |
| "learning_rate": 3.968401721741259e-05, | |
| "loss": 0.3729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08649879693984985, | |
| "step": 120, | |
| "valid_targets_mean": 4845.7, | |
| "valid_targets_min": 2490 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.2454746912712756, | |
| "learning_rate": 3.960036481738819e-05, | |
| "loss": 0.3772, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08654075860977173, | |
| "step": 125, | |
| "valid_targets_mean": 5857.2, | |
| "valid_targets_min": 4130 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 0.27286819412305224, | |
| "learning_rate": 3.950701007509667e-05, | |
| "loss": 0.3698, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0876830592751503, | |
| "step": 130, | |
| "valid_targets_mean": 6018.9, | |
| "valid_targets_min": 3598 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 0.24040936227390755, | |
| "learning_rate": 3.940399920190552e-05, | |
| "loss": 0.3615, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09337051212787628, | |
| "step": 135, | |
| "valid_targets_mean": 5848.9, | |
| "valid_targets_min": 4036 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 0.26092439770692516, | |
| "learning_rate": 3.92913831890467e-05, | |
| "loss": 0.3687, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0913911908864975, | |
| "step": 140, | |
| "valid_targets_mean": 5912.9, | |
| "valid_targets_min": 4229 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 0.24371078285635853, | |
| "learning_rate": 3.916921778237556e-05, | |
| "loss": 0.3664, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08543594181537628, | |
| "step": 145, | |
| "valid_targets_mean": 5582.9, | |
| "valid_targets_min": 3696 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.217048732240177, | |
| "learning_rate": 3.903756345477612e-05, | |
| "loss": 0.3586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08604587614536285, | |
| "step": 150, | |
| "valid_targets_mean": 5474.2, | |
| "valid_targets_min": 3167 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 0.26711655142264756, | |
| "learning_rate": 3.889648537622657e-05, | |
| "loss": 0.3612, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08616804331541061, | |
| "step": 155, | |
| "valid_targets_mean": 5076.6, | |
| "valid_targets_min": 709 | |
| }, | |
| { | |
| "epoch": 1.0192, | |
| "grad_norm": 0.24511649097845134, | |
| "learning_rate": 3.874605338153952e-05, | |
| "loss": 0.3586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.090129055082798, | |
| "step": 160, | |
| "valid_targets_mean": 5228.8, | |
| "valid_targets_min": 3914 | |
| }, | |
| { | |
| "epoch": 1.0512, | |
| "grad_norm": 0.25785229272339794, | |
| "learning_rate": 3.8586341935793265e-05, | |
| "loss": 0.3559, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07839885354042053, | |
| "step": 165, | |
| "valid_targets_mean": 4965.1, | |
| "valid_targets_min": 1130 | |
| }, | |
| { | |
| "epoch": 1.0832, | |
| "grad_norm": 0.26965616757629585, | |
| "learning_rate": 3.841743009747089e-05, | |
| "loss": 0.3532, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08936138451099396, | |
| "step": 170, | |
| "valid_targets_mean": 6028.9, | |
| "valid_targets_min": 3924 | |
| }, | |
| { | |
| "epoch": 1.1152, | |
| "grad_norm": 0.2253315208645885, | |
| "learning_rate": 3.8239401479325714e-05, | |
| "loss": 0.3526, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08879570662975311, | |
| "step": 175, | |
| "valid_targets_mean": 5262.8, | |
| "valid_targets_min": 3437 | |
| }, | |
| { | |
| "epoch": 1.1472, | |
| "grad_norm": 0.2225876874756375, | |
| "learning_rate": 3.8052344206992276e-05, | |
| "loss": 0.3571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08561760932207108, | |
| "step": 180, | |
| "valid_targets_mean": 5392.8, | |
| "valid_targets_min": 3094 | |
| }, | |
| { | |
| "epoch": 1.1792, | |
| "grad_norm": 0.25198165027120606, | |
| "learning_rate": 3.7856350875363396e-05, | |
| "loss": 0.3588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08976033329963684, | |
| "step": 185, | |
| "valid_targets_mean": 5986.1, | |
| "valid_targets_min": 671 | |
| }, | |
| { | |
| "epoch": 1.2112, | |
| "grad_norm": 0.2340713016794323, | |
| "learning_rate": 3.765151850275497e-05, | |
| "loss": 0.3493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08710001409053802, | |
| "step": 190, | |
| "valid_targets_mean": 5964.0, | |
| "valid_targets_min": 3471 | |
| }, | |
| { | |
| "epoch": 1.2432, | |
| "grad_norm": 0.2342881184173051, | |
| "learning_rate": 3.7437948482881104e-05, | |
| "loss": 0.3452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0860249400138855, | |
| "step": 195, | |
| "valid_targets_mean": 6094.3, | |
| "valid_targets_min": 3818 | |
| }, | |
| { | |
| "epoch": 1.2752, | |
| "grad_norm": 0.2525665937119514, | |
| "learning_rate": 3.721574653466336e-05, | |
| "loss": 0.3504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08649658411741257, | |
| "step": 200, | |
| "valid_targets_mean": 6088.2, | |
| "valid_targets_min": 3539 | |
| }, | |
| { | |
| "epoch": 1.3072, | |
| "grad_norm": 0.2274695837603368, | |
| "learning_rate": 3.698502264989903e-05, | |
| "loss": 0.3452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0824115127325058, | |
| "step": 205, | |
| "valid_targets_mean": 5638.9, | |
| "valid_targets_min": 4279 | |
| }, | |
| { | |
| "epoch": 1.3392, | |
| "grad_norm": 0.26782770813054557, | |
| "learning_rate": 3.674589103881432e-05, | |
| "loss": 0.3476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09814299643039703, | |
| "step": 210, | |
| "valid_targets_mean": 5398.6, | |
| "valid_targets_min": 3777 | |
| }, | |
| { | |
| "epoch": 1.3712, | |
| "grad_norm": 0.25592916799225457, | |
| "learning_rate": 3.64984700735293e-05, | |
| "loss": 0.3524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08779767900705338, | |
| "step": 215, | |
| "valid_targets_mean": 5396.3, | |
| "valid_targets_min": 4022 | |
| }, | |
| { | |
| "epoch": 1.4032, | |
| "grad_norm": 0.26084030673703285, | |
| "learning_rate": 3.624288222946273e-05, | |
| "loss": 0.3556, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08763320744037628, | |
| "step": 220, | |
| "valid_targets_mean": 5151.4, | |
| "valid_targets_min": 1167 | |
| }, | |
| { | |
| "epoch": 1.4352, | |
| "grad_norm": 0.2297953014020372, | |
| "learning_rate": 3.597925402470578e-05, | |
| "loss": 0.3466, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08031567931175232, | |
| "step": 225, | |
| "valid_targets_mean": 5586.9, | |
| "valid_targets_min": 3479 | |
| }, | |
| { | |
| "epoch": 1.4672, | |
| "grad_norm": 0.25603642117773706, | |
| "learning_rate": 3.570771595739445e-05, | |
| "loss": 0.3455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09005199372768402, | |
| "step": 230, | |
| "valid_targets_mean": 5418.8, | |
| "valid_targets_min": 3343 | |
| }, | |
| { | |
| "epoch": 1.4992, | |
| "grad_norm": 0.2259912054719558, | |
| "learning_rate": 3.5428402441111964e-05, | |
| "loss": 0.3501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08260194212198257, | |
| "step": 235, | |
| "valid_targets_mean": 5300.5, | |
| "valid_targets_min": 2971 | |
| }, | |
| { | |
| "epoch": 1.5312000000000001, | |
| "grad_norm": 0.29512010642162345, | |
| "learning_rate": 3.5141451738352936e-05, | |
| "loss": 0.3516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08057820051908493, | |
| "step": 240, | |
| "valid_targets_mean": 4825.9, | |
| "valid_targets_min": 757 | |
| }, | |
| { | |
| "epoch": 1.5632000000000001, | |
| "grad_norm": 0.2357451414355105, | |
| "learning_rate": 3.4847005892082266e-05, | |
| "loss": 0.3447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08529014140367508, | |
| "step": 245, | |
| "valid_targets_mean": 5788.1, | |
| "valid_targets_min": 292 | |
| }, | |
| { | |
| "epoch": 1.5952, | |
| "grad_norm": 0.243031110151129, | |
| "learning_rate": 3.454521065542273e-05, | |
| "loss": 0.3403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08439987897872925, | |
| "step": 250, | |
| "valid_targets_mean": 5117.9, | |
| "valid_targets_min": 2601 | |
| }, | |
| { | |
| "epoch": 1.6272, | |
| "grad_norm": 0.2631373773545141, | |
| "learning_rate": 3.423621541950597e-05, | |
| "loss": 0.3406, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08441022783517838, | |
| "step": 255, | |
| "valid_targets_mean": 5009.3, | |
| "valid_targets_min": 1060 | |
| }, | |
| { | |
| "epoch": 1.6592, | |
| "grad_norm": 0.25653559181125485, | |
| "learning_rate": 3.3920173139522664e-05, | |
| "loss": 0.3475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09525300562381744, | |
| "step": 260, | |
| "valid_targets_mean": 6077.0, | |
| "valid_targets_min": 4176 | |
| }, | |
| { | |
| "epoch": 1.6912, | |
| "grad_norm": 0.21963523463947673, | |
| "learning_rate": 3.35972402590084e-05, | |
| "loss": 0.3334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08650698512792587, | |
| "step": 265, | |
| "valid_targets_mean": 6181.9, | |
| "valid_targets_min": 3567 | |
| }, | |
| { | |
| "epoch": 1.7231999999999998, | |
| "grad_norm": 0.2654405120685201, | |
| "learning_rate": 3.326757663240291e-05, | |
| "loss": 0.3428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08159274607896805, | |
| "step": 270, | |
| "valid_targets_mean": 5128.4, | |
| "valid_targets_min": 1748 | |
| }, | |
| { | |
| "epoch": 1.7551999999999999, | |
| "grad_norm": 0.24111341689755342, | |
| "learning_rate": 3.293134544592073e-05, | |
| "loss": 0.3408, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08390471339225769, | |
| "step": 275, | |
| "valid_targets_mean": 5030.9, | |
| "valid_targets_min": 4017 | |
| }, | |
| { | |
| "epoch": 1.7872, | |
| "grad_norm": 0.2168712885276149, | |
| "learning_rate": 3.258871313677274e-05, | |
| "loss": 0.3408, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08097031712532043, | |
| "step": 280, | |
| "valid_targets_mean": 5817.1, | |
| "valid_targets_min": 1078 | |
| }, | |
| { | |
| "epoch": 1.8192, | |
| "grad_norm": 0.24477381681141186, | |
| "learning_rate": 3.2239849310778316e-05, | |
| "loss": 0.3442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08444704860448837, | |
| "step": 285, | |
| "valid_targets_mean": 5124.5, | |
| "valid_targets_min": 2765 | |
| }, | |
| { | |
| "epoch": 1.8512, | |
| "grad_norm": 0.23685277446824146, | |
| "learning_rate": 3.188492665840909e-05, | |
| "loss": 0.3392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0879988819360733, | |
| "step": 290, | |
| "valid_targets_mean": 5757.4, | |
| "valid_targets_min": 4257 | |
| }, | |
| { | |
| "epoch": 1.8832, | |
| "grad_norm": 0.24431534481839284, | |
| "learning_rate": 3.1524120869305726e-05, | |
| "loss": 0.3477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0799805298447609, | |
| "step": 295, | |
| "valid_targets_mean": 5013.1, | |
| "valid_targets_min": 2408 | |
| }, | |
| { | |
| "epoch": 1.9152, | |
| "grad_norm": 0.23717088002345657, | |
| "learning_rate": 3.11576105453101e-05, | |
| "loss": 0.336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08305468410253525, | |
| "step": 300, | |
| "valid_targets_mean": 5636.8, | |
| "valid_targets_min": 3806 | |
| }, | |
| { | |
| "epoch": 1.9472, | |
| "grad_norm": 0.23843928568213113, | |
| "learning_rate": 3.0785577112055916e-05, | |
| "loss": 0.3325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08401763439178467, | |
| "step": 305, | |
| "valid_targets_mean": 5015.1, | |
| "valid_targets_min": 4040 | |
| }, | |
| { | |
| "epoch": 1.9792, | |
| "grad_norm": 0.22230671454556905, | |
| "learning_rate": 3.040820472916153e-05, | |
| "loss": 0.338, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08706588298082352, | |
| "step": 310, | |
| "valid_targets_mean": 5607.3, | |
| "valid_targets_min": 2762 | |
| }, | |
| { | |
| "epoch": 2.0064, | |
| "grad_norm": 0.24335412699803988, | |
| "learning_rate": 3.002568019906939e-05, | |
| "loss": 0.3354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08025571703910828, | |
| "step": 315, | |
| "valid_targets_mean": 5478.8, | |
| "valid_targets_min": 3702 | |
| }, | |
| { | |
| "epoch": 2.0384, | |
| "grad_norm": 0.24330841804288073, | |
| "learning_rate": 2.963819287457733e-05, | |
| "loss": 0.3357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07753533124923706, | |
| "step": 320, | |
| "valid_targets_mean": 6092.9, | |
| "valid_targets_min": 3663 | |
| }, | |
| { | |
| "epoch": 2.0704, | |
| "grad_norm": 0.25064283317993247, | |
| "learning_rate": 2.924593456510733e-05, | |
| "loss": 0.3356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08604463934898376, | |
| "step": 325, | |
| "valid_targets_mean": 5441.4, | |
| "valid_targets_min": 3454 | |
| }, | |
| { | |
| "epoch": 2.1024, | |
| "grad_norm": 0.24013319984882203, | |
| "learning_rate": 2.8849099441758306e-05, | |
| "loss": 0.3402, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0855434238910675, | |
| "step": 330, | |
| "valid_targets_mean": 6197.1, | |
| "valid_targets_min": 4166 | |
| }, | |
| { | |
| "epoch": 2.1344, | |
| "grad_norm": 0.23527928586707625, | |
| "learning_rate": 2.844788394118979e-05, | |
| "loss": 0.3414, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07986783981323242, | |
| "step": 335, | |
| "valid_targets_mean": 4266.2, | |
| "valid_targets_min": 1054 | |
| }, | |
| { | |
| "epoch": 2.1664, | |
| "grad_norm": 0.2476030088095561, | |
| "learning_rate": 2.8042486668384164e-05, | |
| "loss": 0.3335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08800746500492096, | |
| "step": 340, | |
| "valid_targets_mean": 5671.7, | |
| "valid_targets_min": 2845 | |
| }, | |
| { | |
| "epoch": 2.1984, | |
| "grad_norm": 0.2212675652031375, | |
| "learning_rate": 2.7633108298335582e-05, | |
| "loss": 0.3239, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0804094448685646, | |
| "step": 345, | |
| "valid_targets_mean": 6075.1, | |
| "valid_targets_min": 2695 | |
| }, | |
| { | |
| "epoch": 2.2304, | |
| "grad_norm": 0.25952169809602754, | |
| "learning_rate": 2.721995147671416e-05, | |
| "loss": 0.323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09103801101446152, | |
| "step": 350, | |
| "valid_targets_mean": 5498.5, | |
| "valid_targets_min": 2659 | |
| }, | |
| { | |
| "epoch": 2.2624, | |
| "grad_norm": 0.2383291871019339, | |
| "learning_rate": 2.68032207195547e-05, | |
| "loss": 0.3403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09590868651866913, | |
| "step": 355, | |
| "valid_targets_mean": 5983.8, | |
| "valid_targets_min": 3808 | |
| }, | |
| { | |
| "epoch": 2.2944, | |
| "grad_norm": 0.2442383332052773, | |
| "learning_rate": 2.6383122312019604e-05, | |
| "loss": 0.3331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09253934025764465, | |
| "step": 360, | |
| "valid_targets_mean": 5522.2, | |
| "valid_targets_min": 4197 | |
| }, | |
| { | |
| "epoch": 2.3264, | |
| "grad_norm": 0.26697574295179705, | |
| "learning_rate": 2.595986420628597e-05, | |
| "loss": 0.3431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08339850604534149, | |
| "step": 365, | |
| "valid_targets_mean": 5238.8, | |
| "valid_targets_min": 4166 | |
| }, | |
| { | |
| "epoch": 2.3584, | |
| "grad_norm": 0.22410653282363405, | |
| "learning_rate": 2.5533655918607573e-05, | |
| "loss": 0.3386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07287449389696121, | |
| "step": 370, | |
| "valid_targets_mean": 4816.2, | |
| "valid_targets_min": 1188 | |
| }, | |
| { | |
| "epoch": 2.3904, | |
| "grad_norm": 0.24337444824543214, | |
| "learning_rate": 2.510470842560259e-05, | |
| "loss": 0.3355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09005071222782135, | |
| "step": 375, | |
| "valid_targets_mean": 5576.5, | |
| "valid_targets_min": 1602 | |
| }, | |
| { | |
| "epoch": 2.4224, | |
| "grad_norm": 0.23066395747891116, | |
| "learning_rate": 2.467323405981841e-05, | |
| "loss": 0.3242, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08278749883174896, | |
| "step": 380, | |
| "valid_targets_mean": 5321.2, | |
| "valid_targets_min": 2324 | |
| }, | |
| { | |
| "epoch": 2.4544, | |
| "grad_norm": 0.2193267803280765, | |
| "learning_rate": 2.423944640462533e-05, | |
| "loss": 0.3271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08832499384880066, | |
| "step": 385, | |
| "valid_targets_mean": 5073.8, | |
| "valid_targets_min": 1054 | |
| }, | |
| { | |
| "epoch": 2.4864, | |
| "grad_norm": 0.21806083908641838, | |
| "learning_rate": 2.3803560188490968e-05, | |
| "loss": 0.3303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08088940382003784, | |
| "step": 390, | |
| "valid_targets_mean": 6070.9, | |
| "valid_targets_min": 3083 | |
| }, | |
| { | |
| "epoch": 2.5183999999999997, | |
| "grad_norm": 0.22164532444220172, | |
| "learning_rate": 2.336579117868789e-05, | |
| "loss": 0.335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09069826453924179, | |
| "step": 395, | |
| "valid_targets_mean": 5716.8, | |
| "valid_targets_min": 1389 | |
| }, | |
| { | |
| "epoch": 2.5504, | |
| "grad_norm": 0.23682560385891782, | |
| "learning_rate": 2.292635607448711e-05, | |
| "loss": 0.325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08261828124523163, | |
| "step": 400, | |
| "valid_targets_mean": 5917.8, | |
| "valid_targets_min": 1918 | |
| }, | |
| { | |
| "epoch": 2.5824, | |
| "grad_norm": 0.24346477170249317, | |
| "learning_rate": 2.248547239989008e-05, | |
| "loss": 0.3371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08386968076229095, | |
| "step": 405, | |
| "valid_targets_mean": 5211.4, | |
| "valid_targets_min": 3124 | |
| }, | |
| { | |
| "epoch": 2.6144, | |
| "grad_norm": 0.19472948812532634, | |
| "learning_rate": 2.204335839595255e-05, | |
| "loss": 0.3188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07493388652801514, | |
| "step": 410, | |
| "valid_targets_mean": 5276.1, | |
| "valid_targets_min": 2549 | |
| }, | |
| { | |
| "epoch": 2.6464, | |
| "grad_norm": 0.22442481140650106, | |
| "learning_rate": 2.1600232912753452e-05, | |
| "loss": 0.3321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07857003808021545, | |
| "step": 415, | |
| "valid_targets_mean": 5059.2, | |
| "valid_targets_min": 1478 | |
| }, | |
| { | |
| "epoch": 2.6784, | |
| "grad_norm": 0.21770822481824514, | |
| "learning_rate": 2.1156315301062293e-05, | |
| "loss": 0.3325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08654189109802246, | |
| "step": 420, | |
| "valid_targets_mean": 6176.6, | |
| "valid_targets_min": 3999 | |
| }, | |
| { | |
| "epoch": 2.7104, | |
| "grad_norm": 0.2303772265208128, | |
| "learning_rate": 2.0711825303758712e-05, | |
| "loss": 0.3315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07916657626628876, | |
| "step": 425, | |
| "valid_targets_mean": 4857.8, | |
| "valid_targets_min": 1871 | |
| }, | |
| { | |
| "epoch": 2.7424, | |
| "grad_norm": 0.2362702636474169, | |
| "learning_rate": 2.0266982947057962e-05, | |
| "loss": 0.3331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08266505599021912, | |
| "step": 430, | |
| "valid_targets_mean": 5273.8, | |
| "valid_targets_min": 571 | |
| }, | |
| { | |
| "epoch": 2.7744, | |
| "grad_norm": 0.23238610336716467, | |
| "learning_rate": 1.9822008431596083e-05, | |
| "loss": 0.3318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0778215304017067, | |
| "step": 435, | |
| "valid_targets_mean": 5079.6, | |
| "valid_targets_min": 1018 | |
| }, | |
| { | |
| "epoch": 2.8064, | |
| "grad_norm": 0.22745238988383099, | |
| "learning_rate": 1.937712202342881e-05, | |
| "loss": 0.3324, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08198985457420349, | |
| "step": 440, | |
| "valid_targets_mean": 5034.9, | |
| "valid_targets_min": 2190 | |
| }, | |
| { | |
| "epoch": 2.8384, | |
| "grad_norm": 0.22031663400522064, | |
| "learning_rate": 1.8932543944998037e-05, | |
| "loss": 0.3362, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09263623505830765, | |
| "step": 445, | |
| "valid_targets_mean": 5411.8, | |
| "valid_targets_min": 4103 | |
| }, | |
| { | |
| "epoch": 2.8704, | |
| "grad_norm": 0.23446135635447007, | |
| "learning_rate": 1.8488494266119877e-05, | |
| "loss": 0.3245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08320371061563492, | |
| "step": 450, | |
| "valid_targets_mean": 5275.1, | |
| "valid_targets_min": 3131 | |
| }, | |
| { | |
| "epoch": 2.9024, | |
| "grad_norm": 0.2234904380138594, | |
| "learning_rate": 1.804519279504834e-05, | |
| "loss": 0.3251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07954467833042145, | |
| "step": 455, | |
| "valid_targets_mean": 4903.2, | |
| "valid_targets_min": 1309 | |
| }, | |
| { | |
| "epoch": 2.9344, | |
| "grad_norm": 0.2274787643306054, | |
| "learning_rate": 1.7602858969668365e-05, | |
| "loss": 0.3294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08588364720344543, | |
| "step": 460, | |
| "valid_targets_mean": 6065.0, | |
| "valid_targets_min": 4174 | |
| }, | |
| { | |
| "epoch": 2.9664, | |
| "grad_norm": 0.21541040235652797, | |
| "learning_rate": 1.716171174887231e-05, | |
| "loss": 0.3223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08300478011369705, | |
| "step": 465, | |
| "valid_targets_mean": 5221.2, | |
| "valid_targets_min": 2466 | |
| }, | |
| { | |
| "epoch": 2.9984, | |
| "grad_norm": 0.20577577268402722, | |
| "learning_rate": 1.6721969504173484e-05, | |
| "loss": 0.328, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08499614894390106, | |
| "step": 470, | |
| "valid_targets_mean": 6225.9, | |
| "valid_targets_min": 3937 | |
| }, | |
| { | |
| "epoch": 3.0256, | |
| "grad_norm": 0.21751044527982824, | |
| "learning_rate": 1.628384991161041e-05, | |
| "loss": 0.3194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08121082186698914, | |
| "step": 475, | |
| "valid_targets_mean": 6044.2, | |
| "valid_targets_min": 4658 | |
| }, | |
| { | |
| "epoch": 3.0576, | |
| "grad_norm": 0.21018229320526427, | |
| "learning_rate": 1.5847569843995452e-05, | |
| "loss": 0.3234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07966670393943787, | |
| "step": 480, | |
| "valid_targets_mean": 5516.9, | |
| "valid_targets_min": 3439 | |
| }, | |
| { | |
| "epoch": 3.0896, | |
| "grad_norm": 0.21179087254975096, | |
| "learning_rate": 1.5413345263560922e-05, | |
| "loss": 0.3269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08021973818540573, | |
| "step": 485, | |
| "valid_targets_mean": 5511.8, | |
| "valid_targets_min": 625 | |
| }, | |
| { | |
| "epoch": 3.1216, | |
| "grad_norm": 0.21533786371733335, | |
| "learning_rate": 1.4981391115056032e-05, | |
| "loss": 0.3151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08071337640285492, | |
| "step": 490, | |
| "valid_targets_mean": 6069.5, | |
| "valid_targets_min": 4143 | |
| }, | |
| { | |
| "epoch": 3.1536, | |
| "grad_norm": 0.22340718433208287, | |
| "learning_rate": 1.455192121934748e-05, | |
| "loss": 0.3138, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07231708616018295, | |
| "step": 495, | |
| "valid_targets_mean": 5085.9, | |
| "valid_targets_min": 813 | |
| }, | |
| { | |
| "epoch": 3.1856, | |
| "grad_norm": 0.23470120256913535, | |
| "learning_rate": 1.4125148167576303e-05, | |
| "loss": 0.3316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08944922685623169, | |
| "step": 500, | |
| "valid_targets_mean": 5925.7, | |
| "valid_targets_min": 4258 | |
| }, | |
| { | |
| "epoch": 3.2176, | |
| "grad_norm": 0.2225970343777138, | |
| "learning_rate": 1.3701283215923563e-05, | |
| "loss": 0.3198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07638940215110779, | |
| "step": 505, | |
| "valid_targets_mean": 5141.3, | |
| "valid_targets_min": 1120 | |
| }, | |
| { | |
| "epoch": 3.2496, | |
| "grad_norm": 0.22812978410814985, | |
| "learning_rate": 1.328053618103677e-05, | |
| "loss": 0.3283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08584798872470856, | |
| "step": 510, | |
| "valid_targets_mean": 5091.2, | |
| "valid_targets_min": 1999 | |
| }, | |
| { | |
| "epoch": 3.2816, | |
| "grad_norm": 0.2108055801479295, | |
| "learning_rate": 1.2863115336168916e-05, | |
| "loss": 0.3257, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07783376425504684, | |
| "step": 515, | |
| "valid_targets_mean": 5384.1, | |
| "valid_targets_min": 935 | |
| }, | |
| { | |
| "epoch": 3.3136, | |
| "grad_norm": 0.27065537006118323, | |
| "learning_rate": 1.2449227308081509e-05, | |
| "loss": 0.3207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07168179750442505, | |
| "step": 520, | |
| "valid_targets_mean": 5510.8, | |
| "valid_targets_min": 686 | |
| }, | |
| { | |
| "epoch": 3.3456, | |
| "grad_norm": 0.22197248150979024, | |
| "learning_rate": 1.2039076974762587e-05, | |
| "loss": 0.3254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08753179013729095, | |
| "step": 525, | |
| "valid_targets_mean": 5997.3, | |
| "valid_targets_min": 4451 | |
| }, | |
| { | |
| "epoch": 3.3776, | |
| "grad_norm": 0.21204994803562507, | |
| "learning_rate": 1.163286736401044e-05, | |
| "loss": 0.3295, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07973788678646088, | |
| "step": 530, | |
| "valid_targets_mean": 5424.2, | |
| "valid_targets_min": 2094 | |
| }, | |
| { | |
| "epoch": 3.4096, | |
| "grad_norm": 0.20565663141003207, | |
| "learning_rate": 1.123079955293322e-05, | |
| "loss": 0.3268, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07733616232872009, | |
| "step": 535, | |
| "valid_targets_mean": 5321.2, | |
| "valid_targets_min": 1101 | |
| }, | |
| { | |
| "epoch": 3.4416, | |
| "grad_norm": 0.21601342130688855, | |
| "learning_rate": 1.0833072568414037e-05, | |
| "loss": 0.3238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08604863286018372, | |
| "step": 540, | |
| "valid_targets_mean": 5389.1, | |
| "valid_targets_min": 2707 | |
| }, | |
| { | |
| "epoch": 3.4736000000000002, | |
| "grad_norm": 0.2266250484856398, | |
| "learning_rate": 1.0439883288591057e-05, | |
| "loss": 0.3234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08474144339561462, | |
| "step": 545, | |
| "valid_targets_mean": 5556.5, | |
| "valid_targets_min": 3476 | |
| }, | |
| { | |
| "epoch": 3.5056000000000003, | |
| "grad_norm": 0.21000231515568052, | |
| "learning_rate": 1.0051426345401202e-05, | |
| "loss": 0.3226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07983237504959106, | |
| "step": 550, | |
| "valid_targets_mean": 5647.8, | |
| "valid_targets_min": 1239 | |
| }, | |
| { | |
| "epoch": 3.5376, | |
| "grad_norm": 0.21908337779577544, | |
| "learning_rate": 9.667894028235704e-06, | |
| "loss": 0.328, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07918648421764374, | |
| "step": 555, | |
| "valid_targets_mean": 5250.9, | |
| "valid_targets_min": 2989 | |
| }, | |
| { | |
| "epoch": 3.5696, | |
| "grad_norm": 0.20775201883443636, | |
| "learning_rate": 9.289476188755315e-06, | |
| "loss": 0.3207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08295097947120667, | |
| "step": 560, | |
| "valid_targets_mean": 6207.6, | |
| "valid_targets_min": 4217 | |
| }, | |
| { | |
| "epoch": 3.6016, | |
| "grad_norm": 0.21294278678462036, | |
| "learning_rate": 8.916360146912122e-06, | |
| "loss": 0.3194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07971015572547913, | |
| "step": 565, | |
| "valid_targets_mean": 5787.9, | |
| "valid_targets_min": 3808 | |
| }, | |
| { | |
| "epoch": 3.6336, | |
| "grad_norm": 0.22408542647313262, | |
| "learning_rate": 8.548730598224646e-06, | |
| "loss": 0.3242, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08619304746389389, | |
| "step": 570, | |
| "valid_targets_mean": 5396.3, | |
| "valid_targets_min": 3498 | |
| }, | |
| { | |
| "epoch": 3.6656, | |
| "grad_norm": 0.2289853339804503, | |
| "learning_rate": 8.186769522352053e-06, | |
| "loss": 0.3204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08720386028289795, | |
| "step": 575, | |
| "valid_targets_mean": 5984.4, | |
| "valid_targets_min": 4048 | |
| }, | |
| { | |
| "epoch": 3.6976, | |
| "grad_norm": 0.21073530692466028, | |
| "learning_rate": 7.830656093012714e-06, | |
| "loss": 0.3286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08480887115001678, | |
| "step": 580, | |
| "valid_targets_mean": 5643.3, | |
| "valid_targets_min": 2574 | |
| }, | |
| { | |
| "epoch": 3.7296, | |
| "grad_norm": 0.22372831454903153, | |
| "learning_rate": 7.480566589291696e-06, | |
| "loss": 0.3271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0835227221250534, | |
| "step": 585, | |
| "valid_targets_mean": 5632.9, | |
| "valid_targets_min": 3630 | |
| }, | |
| { | |
| "epoch": 3.7616, | |
| "grad_norm": 0.19869385568005432, | |
| "learning_rate": 7.1366743083812285e-06, | |
| "loss": 0.3139, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07951802760362625, | |
| "step": 590, | |
| "valid_targets_mean": 5470.8, | |
| "valid_targets_min": 3152 | |
| }, | |
| { | |
| "epoch": 3.7936, | |
| "grad_norm": 0.20574173001103932, | |
| "learning_rate": 6.799149479797101e-06, | |
| "loss": 0.3233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0819997638463974, | |
| "step": 595, | |
| "valid_targets_mean": 5568.1, | |
| "valid_targets_min": 4110 | |
| }, | |
| { | |
| "epoch": 3.8256, | |
| "grad_norm": 0.2152111168894627, | |
| "learning_rate": 6.4681591811137e-06, | |
| "loss": 0.3264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09062743186950684, | |
| "step": 600, | |
| "valid_targets_mean": 5654.9, | |
| "valid_targets_min": 2936 | |
| }, | |
| { | |
| "epoch": 3.8576, | |
| "grad_norm": 0.2053653642091841, | |
| "learning_rate": 6.143867255259197e-06, | |
| "loss": 0.3286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08145453035831451, | |
| "step": 605, | |
| "valid_targets_mean": 5277.7, | |
| "valid_targets_min": 958 | |
| }, | |
| { | |
| "epoch": 3.8895999999999997, | |
| "grad_norm": 0.19618687651688638, | |
| "learning_rate": 5.8264342294119504e-06, | |
| "loss": 0.3307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07444492727518082, | |
| "step": 610, | |
| "valid_targets_mean": 5692.4, | |
| "valid_targets_min": 1054 | |
| }, | |
| { | |
| "epoch": 3.9215999999999998, | |
| "grad_norm": 0.4633944919440031, | |
| "learning_rate": 5.516017235538258e-06, | |
| "loss": 0.3218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07183433324098587, | |
| "step": 615, | |
| "valid_targets_mean": 6619.4, | |
| "valid_targets_min": 4473 | |
| }, | |
| { | |
| "epoch": 3.9536, | |
| "grad_norm": 0.19886084345698227, | |
| "learning_rate": 5.212769932610695e-06, | |
| "loss": 0.3221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0788281261920929, | |
| "step": 620, | |
| "valid_targets_mean": 5528.3, | |
| "valid_targets_min": 3187 | |
| }, | |
| { | |
| "epoch": 3.9856, | |
| "grad_norm": 0.208092162317957, | |
| "learning_rate": 4.916842430545681e-06, | |
| "loss": 0.3307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08409751206636429, | |
| "step": 625, | |
| "valid_targets_mean": 6215.4, | |
| "valid_targets_min": 3757 | |
| }, | |
| { | |
| "epoch": 4.0128, | |
| "grad_norm": 0.21479215026797172, | |
| "learning_rate": 4.628381215897837e-06, | |
| "loss": 0.3245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08276808261871338, | |
| "step": 630, | |
| "valid_targets_mean": 4926.9, | |
| "valid_targets_min": 2898 | |
| }, | |
| { | |
| "epoch": 4.0448, | |
| "grad_norm": 0.20550296016432856, | |
| "learning_rate": 4.347529079347914e-06, | |
| "loss": 0.3226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07196902483701706, | |
| "step": 635, | |
| "valid_targets_mean": 5715.8, | |
| "valid_targets_min": 710 | |
| }, | |
| { | |
| "epoch": 4.0768, | |
| "grad_norm": 0.19841931672206817, | |
| "learning_rate": 4.074425045020247e-06, | |
| "loss": 0.3055, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07841131091117859, | |
| "step": 640, | |
| "valid_targets_mean": 5529.1, | |
| "valid_targets_min": 881 | |
| }, | |
| { | |
| "epoch": 4.1088, | |
| "grad_norm": 0.20632895716592087, | |
| "learning_rate": 3.8092043016646487e-06, | |
| "loss": 0.3238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08663403987884521, | |
| "step": 645, | |
| "valid_targets_mean": 5540.6, | |
| "valid_targets_min": 646 | |
| }, | |
| { | |
| "epoch": 4.1408, | |
| "grad_norm": 0.19479668863200172, | |
| "learning_rate": 3.551998135736867e-06, | |
| "loss": 0.3247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08184544742107391, | |
| "step": 650, | |
| "valid_targets_mean": 5659.1, | |
| "valid_targets_min": 3048 | |
| }, | |
| { | |
| "epoch": 4.1728, | |
| "grad_norm": 0.21722025161657196, | |
| "learning_rate": 3.3029338664107267e-06, | |
| "loss": 0.3205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08144575357437134, | |
| "step": 655, | |
| "valid_targets_mean": 5178.1, | |
| "valid_targets_min": 571 | |
| }, | |
| { | |
| "epoch": 4.2048, | |
| "grad_norm": 0.20576725177526284, | |
| "learning_rate": 3.0621347825540625e-06, | |
| "loss": 0.3249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07952291518449783, | |
| "step": 660, | |
| "valid_targets_mean": 5043.1, | |
| "valid_targets_min": 1090 | |
| }, | |
| { | |
| "epoch": 4.2368, | |
| "grad_norm": 0.2137647103914308, | |
| "learning_rate": 2.8297200816997183e-06, | |
| "loss": 0.3243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08988995850086212, | |
| "step": 665, | |
| "valid_targets_mean": 6148.6, | |
| "valid_targets_min": 4401 | |
| }, | |
| { | |
| "epoch": 4.2688, | |
| "grad_norm": 0.20096638447003157, | |
| "learning_rate": 2.605804811041803e-06, | |
| "loss": 0.3129, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0822472795844078, | |
| "step": 670, | |
| "valid_targets_mean": 5288.3, | |
| "valid_targets_min": 3220 | |
| }, | |
| { | |
| "epoch": 4.3008, | |
| "grad_norm": 0.20183040579550365, | |
| "learning_rate": 2.390499810486351e-06, | |
| "loss": 0.3338, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08840819448232651, | |
| "step": 675, | |
| "valid_targets_mean": 5098.6, | |
| "valid_targets_min": 3391 | |
| }, | |
| { | |
| "epoch": 4.3328, | |
| "grad_norm": 0.19426925166522613, | |
| "learning_rate": 2.183911657784685e-06, | |
| "loss": 0.3189, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08456306159496307, | |
| "step": 680, | |
| "valid_targets_mean": 5405.1, | |
| "valid_targets_min": 3206 | |
| }, | |
| { | |
| "epoch": 4.3648, | |
| "grad_norm": 0.20116698442114167, | |
| "learning_rate": 1.986142615776532e-06, | |
| "loss": 0.3304, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07993205636739731, | |
| "step": 685, | |
| "valid_targets_mean": 5235.6, | |
| "valid_targets_min": 1167 | |
| }, | |
| { | |
| "epoch": 4.3968, | |
| "grad_norm": 0.20291008340159364, | |
| "learning_rate": 1.7972905817690644e-06, | |
| "loss": 0.3178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08226853609085083, | |
| "step": 690, | |
| "valid_targets_mean": 5901.6, | |
| "valid_targets_min": 4097 | |
| }, | |
| { | |
| "epoch": 4.4288, | |
| "grad_norm": 0.1970309784297005, | |
| "learning_rate": 1.617449039076955e-06, | |
| "loss": 0.3205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0889642983675003, | |
| "step": 695, | |
| "valid_targets_mean": 6463.2, | |
| "valid_targets_min": 3151 | |
| }, | |
| { | |
| "epoch": 4.4608, | |
| "grad_norm": 0.21957818241096985, | |
| "learning_rate": 1.4467070107473413e-06, | |
| "loss": 0.3102, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08201165497303009, | |
| "step": 700, | |
| "valid_targets_mean": 5256.1, | |
| "valid_targets_min": 1931 | |
| }, | |
| { | |
| "epoch": 4.4928, | |
| "grad_norm": 0.20381806326480958, | |
| "learning_rate": 1.2851490154926816e-06, | |
| "loss": 0.3217, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08283894509077072, | |
| "step": 705, | |
| "valid_targets_mean": 5474.8, | |
| "valid_targets_min": 3711 | |
| }, | |
| { | |
| "epoch": 4.5248, | |
| "grad_norm": 0.20007664300882147, | |
| "learning_rate": 1.1328550258533211e-06, | |
| "loss": 0.3195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08363893628120422, | |
| "step": 710, | |
| "valid_targets_mean": 5336.9, | |
| "valid_targets_min": 4024 | |
| }, | |
| { | |
| "epoch": 4.5568, | |
| "grad_norm": 0.2027444945770559, | |
| "learning_rate": 9.899004286103953e-07, | |
| "loss": 0.3145, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08135296404361725, | |
| "step": 715, | |
| "valid_targets_mean": 5425.2, | |
| "valid_targets_min": 3646 | |
| }, | |
| { | |
| "epoch": 4.5888, | |
| "grad_norm": 0.18936965807900735, | |
| "learning_rate": 8.5635598746876e-07, | |
| "loss": 0.3156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07710923254489899, | |
| "step": 720, | |
| "valid_targets_mean": 6018.6, | |
| "valid_targets_min": 4053 | |
| }, | |
| { | |
| "epoch": 4.6208, | |
| "grad_norm": 0.1847915315828253, | |
| "learning_rate": 7.32287808028389e-07, | |
| "loss": 0.3265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07985624670982361, | |
| "step": 725, | |
| "valid_targets_mean": 6699.7, | |
| "valid_targets_min": 2723 | |
| }, | |
| { | |
| "epoch": 4.6528, | |
| "grad_norm": 0.2099701962926105, | |
| "learning_rate": 6.177573050615327e-07, | |
| "loss": 0.3154, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08430498838424683, | |
| "step": 730, | |
| "valid_targets_mean": 5119.5, | |
| "valid_targets_min": 3867 | |
| }, | |
| { | |
| "epoch": 4.6848, | |
| "grad_norm": 0.18915002670306677, | |
| "learning_rate": 5.128211721119213e-07, | |
| "loss": 0.3188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0837610512971878, | |
| "step": 735, | |
| "valid_targets_mean": 6211.8, | |
| "valid_targets_min": 4771 | |
| }, | |
| { | |
| "epoch": 4.7168, | |
| "grad_norm": 0.21175210312067896, | |
| "learning_rate": 4.175313534309755e-07, | |
| "loss": 0.3217, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08258293569087982, | |
| "step": 740, | |
| "valid_targets_mean": 5606.8, | |
| "valid_targets_min": 4067 | |
| }, | |
| { | |
| "epoch": 4.7488, | |
| "grad_norm": 0.19171064939070626, | |
| "learning_rate": 3.319350182649861e-07, | |
| "loss": 0.3207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08525727689266205, | |
| "step": 745, | |
| "valid_targets_mean": 6320.4, | |
| "valid_targets_min": 3489 | |
| }, | |
| { | |
| "epoch": 4.7808, | |
| "grad_norm": 0.20140414971721998, | |
| "learning_rate": 2.560745375059392e-07, | |
| "loss": 0.3179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07746146619319916, | |
| "step": 750, | |
| "valid_targets_mean": 5373.7, | |
| "valid_targets_min": 3737 | |
| }, | |
| { | |
| "epoch": 4.8128, | |
| "grad_norm": 0.19725191792852176, | |
| "learning_rate": 1.8998746271758016e-07, | |
| "loss": 0.3231, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07618357986211777, | |
| "step": 755, | |
| "valid_targets_mean": 5033.5, | |
| "valid_targets_min": 1995 | |
| }, | |
| { | |
| "epoch": 4.8448, | |
| "grad_norm": 0.18764613291156892, | |
| "learning_rate": 1.337065075470778e-07, | |
| "loss": 0.3189, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0758911669254303, | |
| "step": 760, | |
| "valid_targets_mean": 5730.7, | |
| "valid_targets_min": 3598 | |
| }, | |
| { | |
| "epoch": 4.8768, | |
| "grad_norm": 0.18701696026878642, | |
| "learning_rate": 8.725953153150279e-08, | |
| "loss": 0.3149, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07627741992473602, | |
| "step": 765, | |
| "valid_targets_mean": 5283.1, | |
| "valid_targets_min": 389 | |
| }, | |
| { | |
| "epoch": 4.9088, | |
| "grad_norm": 0.20233391129510425, | |
| "learning_rate": 5.066952630711886e-08, | |
| "loss": 0.3263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08208300173282623, | |
| "step": 770, | |
| "valid_targets_mean": 5043.8, | |
| "valid_targets_min": 645 | |
| }, | |
| { | |
| "epoch": 4.9408, | |
| "grad_norm": 0.19082641034730213, | |
| "learning_rate": 2.3954604228342283e-08, | |
| "loss": 0.3236, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07420461624860764, | |
| "step": 775, | |
| "valid_targets_mean": 5601.7, | |
| "valid_targets_min": 4300 | |
| }, | |
| { | |
| "epoch": 4.9728, | |
| "grad_norm": 0.19359894736289907, | |
| "learning_rate": 7.12798940197601e-09, | |
| "loss": 0.3208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08184170722961426, | |
| "step": 780, | |
| "valid_targets_mean": 5441.5, | |
| "valid_targets_min": 1112 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.4056907192862938, | |
| "learning_rate": 1.9801114115480802e-10, | |
| "loss": 0.3226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3350988030433655, | |
| "step": 785, | |
| "valid_targets_mean": 5269.4, | |
| "valid_targets_min": 3557 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3350988030433655, | |
| "step": 785, | |
| "total_flos": 2.1121566765544899e+18, | |
| "train_loss": 0.35298769489215437, | |
| "train_runtime": 22279.9871, | |
| "train_samples_per_second": 2.242, | |
| "train_steps_per_second": 0.035, | |
| "valid_targets_mean": 5269.4, | |
| "valid_targets_min": 3557 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 785, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.1121566765544899e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |