{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 785, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.032, "grad_norm": 7.992597780897032, "learning_rate": 2.0253164556962026e-06, "loss": 0.7346, "loss_nan_ranks": 0, "loss_rank_avg": 0.1965068280696869, "step": 5, "valid_targets_mean": 5113.5, "valid_targets_min": 3169 }, { "epoch": 0.064, "grad_norm": 4.905164971122138, "learning_rate": 4.556962025316456e-06, "loss": 0.7062, "loss_nan_ranks": 0, "loss_rank_avg": 0.16786593198776245, "step": 10, "valid_targets_mean": 5742.6, "valid_targets_min": 4001 }, { "epoch": 0.096, "grad_norm": 2.4265542623454994, "learning_rate": 7.08860759493671e-06, "loss": 0.6469, "loss_nan_ranks": 0, "loss_rank_avg": 0.1609123945236206, "step": 15, "valid_targets_mean": 5070.8, "valid_targets_min": 1187 }, { "epoch": 0.128, "grad_norm": 0.9695481834207988, "learning_rate": 9.620253164556963e-06, "loss": 0.5955, "loss_nan_ranks": 0, "loss_rank_avg": 0.152688130736351, "step": 20, "valid_targets_mean": 5043.4, "valid_targets_min": 3778 }, { "epoch": 0.16, "grad_norm": 0.8556904894549953, "learning_rate": 1.2151898734177216e-05, "loss": 0.5631, "loss_nan_ranks": 0, "loss_rank_avg": 0.1266036331653595, "step": 25, "valid_targets_mean": 5028.5, "valid_targets_min": 3598 }, { "epoch": 0.192, "grad_norm": 0.5406877764182748, "learning_rate": 1.468354430379747e-05, "loss": 0.5366, "loss_nan_ranks": 0, "loss_rank_avg": 0.12929826974868774, "step": 30, "valid_targets_mean": 5412.2, "valid_targets_min": 3452 }, { "epoch": 0.224, "grad_norm": 0.4491232079388601, "learning_rate": 1.7215189873417723e-05, "loss": 0.5279, "loss_nan_ranks": 0, "loss_rank_avg": 0.12588968873023987, "step": 35, "valid_targets_mean": 5830.6, "valid_targets_min": 1236 }, { "epoch": 0.256, "grad_norm": 0.3734935489185054, "learning_rate": 1.974683544303798e-05, "loss": 0.4878, "loss_nan_ranks": 0, "loss_rank_avg": 0.1219080314040184, "step": 40, "valid_targets_mean": 5462.8, "valid_targets_min": 3876 }, { "epoch": 0.288, "grad_norm": 0.3243375480472286, "learning_rate": 2.2278481012658228e-05, "loss": 0.4516, "loss_nan_ranks": 0, "loss_rank_avg": 0.11703348159790039, "step": 45, "valid_targets_mean": 5427.9, "valid_targets_min": 3616 }, { "epoch": 0.32, "grad_norm": 0.2882643420638927, "learning_rate": 2.481012658227848e-05, "loss": 0.4482, "loss_nan_ranks": 0, "loss_rank_avg": 0.10204213112592697, "step": 50, "valid_targets_mean": 5454.8, "valid_targets_min": 1708 }, { "epoch": 0.352, "grad_norm": 0.2769504438340826, "learning_rate": 2.7341772151898737e-05, "loss": 0.4342, "loss_nan_ranks": 0, "loss_rank_avg": 0.11272697150707245, "step": 55, "valid_targets_mean": 5620.4, "valid_targets_min": 1322 }, { "epoch": 0.384, "grad_norm": 0.2620427633098365, "learning_rate": 2.987341772151899e-05, "loss": 0.4211, "loss_nan_ranks": 0, "loss_rank_avg": 0.10937047749757767, "step": 60, "valid_targets_mean": 5090.8, "valid_targets_min": 3842 }, { "epoch": 0.416, "grad_norm": 0.2463435677646114, "learning_rate": 3.240506329113924e-05, "loss": 0.4135, "loss_nan_ranks": 0, "loss_rank_avg": 0.10371287912130356, "step": 65, "valid_targets_mean": 5944.2, "valid_targets_min": 3980 }, { "epoch": 0.448, "grad_norm": 0.2439380748245345, "learning_rate": 3.49367088607595e-05, "loss": 0.4112, "loss_nan_ranks": 0, "loss_rank_avg": 0.11104129254817963, "step": 70, "valid_targets_mean": 6091.2, "valid_targets_min": 4103 }, { "epoch": 0.48, "grad_norm": 0.2592892650393887, "learning_rate": 3.746835443037975e-05, "loss": 0.392, "loss_nan_ranks": 0, "loss_rank_avg": 0.09229408204555511, "step": 75, "valid_targets_mean": 5581.8, "valid_targets_min": 3931 }, { "epoch": 0.512, "grad_norm": 0.27965263535503615, "learning_rate": 4e-05, "loss": 0.3945, "loss_nan_ranks": 0, "loss_rank_avg": 0.09553131461143494, "step": 80, "valid_targets_mean": 5852.2, "valid_targets_min": 3736 }, { "epoch": 0.544, "grad_norm": 0.23703955142453972, "learning_rate": 3.999504991751045e-05, "loss": 0.3795, "loss_nan_ranks": 0, "loss_rank_avg": 0.09883347153663635, "step": 85, "valid_targets_mean": 6116.4, "valid_targets_min": 1188 }, { "epoch": 0.576, "grad_norm": 0.27006174007491235, "learning_rate": 3.9980202120373464e-05, "loss": 0.392, "loss_nan_ranks": 0, "loss_rank_avg": 0.09063629806041718, "step": 90, "valid_targets_mean": 4680.5, "valid_targets_min": 638 }, { "epoch": 0.608, "grad_norm": 0.26947357129102495, "learning_rate": 3.995546395837111e-05, "loss": 0.3846, "loss_nan_ranks": 0, "loss_rank_avg": 0.1091674342751503, "step": 95, "valid_targets_mean": 6072.8, "valid_targets_min": 3603 }, { "epoch": 0.64, "grad_norm": 0.2927613874979733, "learning_rate": 3.992084767709763e-05, "loss": 0.3735, "loss_nan_ranks": 0, "loss_rank_avg": 0.09993791580200195, "step": 100, "valid_targets_mean": 5355.8, "valid_targets_min": 826 }, { "epoch": 0.672, "grad_norm": 0.24121791608110918, "learning_rate": 3.987637041189781e-05, "loss": 0.3845, "loss_nan_ranks": 0, "loss_rank_avg": 0.09499676525592804, "step": 105, "valid_targets_mean": 6090.1, "valid_targets_min": 3190 }, { "epoch": 0.704, "grad_norm": 0.24568965637675186, "learning_rate": 3.982205417938482e-05, "loss": 0.3783, "loss_nan_ranks": 0, "loss_rank_avg": 0.09854920208454132, "step": 110, "valid_targets_mean": 5208.7, "valid_targets_min": 3716 }, { "epoch": 0.736, "grad_norm": 0.2440314012575419, "learning_rate": 3.975792586654179e-05, "loss": 0.3661, "loss_nan_ranks": 0, "loss_rank_avg": 0.08172737061977386, "step": 115, "valid_targets_mean": 5737.1, "valid_targets_min": 2032 }, { "epoch": 0.768, "grad_norm": 0.25094578582914895, "learning_rate": 3.968401721741259e-05, "loss": 0.3729, "loss_nan_ranks": 0, "loss_rank_avg": 0.08649879693984985, "step": 120, "valid_targets_mean": 4845.7, "valid_targets_min": 2490 }, { "epoch": 0.8, "grad_norm": 0.2454746912712756, "learning_rate": 3.960036481738819e-05, "loss": 0.3772, "loss_nan_ranks": 0, "loss_rank_avg": 0.08654075860977173, "step": 125, "valid_targets_mean": 5857.2, "valid_targets_min": 4130 }, { "epoch": 0.832, "grad_norm": 0.27286819412305224, "learning_rate": 3.950701007509667e-05, "loss": 0.3698, "loss_nan_ranks": 0, "loss_rank_avg": 0.0876830592751503, "step": 130, "valid_targets_mean": 6018.9, "valid_targets_min": 3598 }, { "epoch": 0.864, "grad_norm": 0.24040936227390755, "learning_rate": 3.940399920190552e-05, "loss": 0.3615, "loss_nan_ranks": 0, "loss_rank_avg": 0.09337051212787628, "step": 135, "valid_targets_mean": 5848.9, "valid_targets_min": 4036 }, { "epoch": 0.896, "grad_norm": 0.26092439770692516, "learning_rate": 3.92913831890467e-05, "loss": 0.3687, "loss_nan_ranks": 0, "loss_rank_avg": 0.0913911908864975, "step": 140, "valid_targets_mean": 5912.9, "valid_targets_min": 4229 }, { "epoch": 0.928, "grad_norm": 0.24371078285635853, "learning_rate": 3.916921778237556e-05, "loss": 0.3664, "loss_nan_ranks": 0, "loss_rank_avg": 0.08543594181537628, "step": 145, "valid_targets_mean": 5582.9, "valid_targets_min": 3696 }, { "epoch": 0.96, "grad_norm": 0.217048732240177, "learning_rate": 3.903756345477612e-05, "loss": 0.3586, "loss_nan_ranks": 0, "loss_rank_avg": 0.08604587614536285, "step": 150, "valid_targets_mean": 5474.2, "valid_targets_min": 3167 }, { "epoch": 0.992, "grad_norm": 0.26711655142264756, "learning_rate": 3.889648537622657e-05, "loss": 0.3612, "loss_nan_ranks": 0, "loss_rank_avg": 0.08616804331541061, "step": 155, "valid_targets_mean": 5076.6, "valid_targets_min": 709 }, { "epoch": 1.0192, "grad_norm": 0.24511649097845134, "learning_rate": 3.874605338153952e-05, "loss": 0.3586, "loss_nan_ranks": 0, "loss_rank_avg": 0.090129055082798, "step": 160, "valid_targets_mean": 5228.8, "valid_targets_min": 3914 }, { "epoch": 1.0512, "grad_norm": 0.25785229272339794, "learning_rate": 3.8586341935793265e-05, "loss": 0.3559, "loss_nan_ranks": 0, "loss_rank_avg": 0.07839885354042053, "step": 165, "valid_targets_mean": 4965.1, "valid_targets_min": 1130 }, { "epoch": 1.0832, "grad_norm": 0.26965616757629585, "learning_rate": 3.841743009747089e-05, "loss": 0.3532, "loss_nan_ranks": 0, "loss_rank_avg": 0.08936138451099396, "step": 170, "valid_targets_mean": 6028.9, "valid_targets_min": 3924 }, { "epoch": 1.1152, "grad_norm": 0.2253315208645885, "learning_rate": 3.8239401479325714e-05, "loss": 0.3526, "loss_nan_ranks": 0, "loss_rank_avg": 0.08879570662975311, "step": 175, "valid_targets_mean": 5262.8, "valid_targets_min": 3437 }, { "epoch": 1.1472, "grad_norm": 0.2225876874756375, "learning_rate": 3.8052344206992276e-05, "loss": 0.3571, "loss_nan_ranks": 0, "loss_rank_avg": 0.08561760932207108, "step": 180, "valid_targets_mean": 5392.8, "valid_targets_min": 3094 }, { "epoch": 1.1792, "grad_norm": 0.25198165027120606, "learning_rate": 3.7856350875363396e-05, "loss": 0.3588, "loss_nan_ranks": 0, "loss_rank_avg": 0.08976033329963684, "step": 185, "valid_targets_mean": 5986.1, "valid_targets_min": 671 }, { "epoch": 1.2112, "grad_norm": 0.2340713016794323, "learning_rate": 3.765151850275497e-05, "loss": 0.3493, "loss_nan_ranks": 0, "loss_rank_avg": 0.08710001409053802, "step": 190, "valid_targets_mean": 5964.0, "valid_targets_min": 3471 }, { "epoch": 1.2432, "grad_norm": 0.2342881184173051, "learning_rate": 3.7437948482881104e-05, "loss": 0.3452, "loss_nan_ranks": 0, "loss_rank_avg": 0.0860249400138855, "step": 195, "valid_targets_mean": 6094.3, "valid_targets_min": 3818 }, { "epoch": 1.2752, "grad_norm": 0.2525665937119514, "learning_rate": 3.721574653466336e-05, "loss": 0.3504, "loss_nan_ranks": 0, "loss_rank_avg": 0.08649658411741257, "step": 200, "valid_targets_mean": 6088.2, "valid_targets_min": 3539 }, { "epoch": 1.3072, "grad_norm": 0.2274695837603368, "learning_rate": 3.698502264989903e-05, "loss": 0.3452, "loss_nan_ranks": 0, "loss_rank_avg": 0.0824115127325058, "step": 205, "valid_targets_mean": 5638.9, "valid_targets_min": 4279 }, { "epoch": 1.3392, "grad_norm": 0.26782770813054557, "learning_rate": 3.674589103881432e-05, "loss": 0.3476, "loss_nan_ranks": 0, "loss_rank_avg": 0.09814299643039703, "step": 210, "valid_targets_mean": 5398.6, "valid_targets_min": 3777 }, { "epoch": 1.3712, "grad_norm": 0.25592916799225457, "learning_rate": 3.64984700735293e-05, "loss": 0.3524, "loss_nan_ranks": 0, "loss_rank_avg": 0.08779767900705338, "step": 215, "valid_targets_mean": 5396.3, "valid_targets_min": 4022 }, { "epoch": 1.4032, "grad_norm": 0.26084030673703285, "learning_rate": 3.624288222946273e-05, "loss": 0.3556, "loss_nan_ranks": 0, "loss_rank_avg": 0.08763320744037628, "step": 220, "valid_targets_mean": 5151.4, "valid_targets_min": 1167 }, { "epoch": 1.4352, "grad_norm": 0.2297953014020372, "learning_rate": 3.597925402470578e-05, "loss": 0.3466, "loss_nan_ranks": 0, "loss_rank_avg": 0.08031567931175232, "step": 225, "valid_targets_mean": 5586.9, "valid_targets_min": 3479 }, { "epoch": 1.4672, "grad_norm": 0.25603642117773706, "learning_rate": 3.570771595739445e-05, "loss": 0.3455, "loss_nan_ranks": 0, "loss_rank_avg": 0.09005199372768402, "step": 230, "valid_targets_mean": 5418.8, "valid_targets_min": 3343 }, { "epoch": 1.4992, "grad_norm": 0.2259912054719558, "learning_rate": 3.5428402441111964e-05, "loss": 0.3501, "loss_nan_ranks": 0, "loss_rank_avg": 0.08260194212198257, "step": 235, "valid_targets_mean": 5300.5, "valid_targets_min": 2971 }, { "epoch": 1.5312000000000001, "grad_norm": 0.29512010642162345, "learning_rate": 3.5141451738352936e-05, "loss": 0.3516, "loss_nan_ranks": 0, "loss_rank_avg": 0.08057820051908493, "step": 240, "valid_targets_mean": 4825.9, "valid_targets_min": 757 }, { "epoch": 1.5632000000000001, "grad_norm": 0.2357451414355105, "learning_rate": 3.4847005892082266e-05, "loss": 0.3447, "loss_nan_ranks": 0, "loss_rank_avg": 0.08529014140367508, "step": 245, "valid_targets_mean": 5788.1, "valid_targets_min": 292 }, { "epoch": 1.5952, "grad_norm": 0.243031110151129, "learning_rate": 3.454521065542273e-05, "loss": 0.3403, "loss_nan_ranks": 0, "loss_rank_avg": 0.08439987897872925, "step": 250, "valid_targets_mean": 5117.9, "valid_targets_min": 2601 }, { "epoch": 1.6272, "grad_norm": 0.2631373773545141, "learning_rate": 3.423621541950597e-05, "loss": 0.3406, "loss_nan_ranks": 0, "loss_rank_avg": 0.08441022783517838, "step": 255, "valid_targets_mean": 5009.3, "valid_targets_min": 1060 }, { "epoch": 1.6592, "grad_norm": 0.25653559181125485, "learning_rate": 3.3920173139522664e-05, "loss": 0.3475, "loss_nan_ranks": 0, "loss_rank_avg": 0.09525300562381744, "step": 260, "valid_targets_mean": 6077.0, "valid_targets_min": 4176 }, { "epoch": 1.6912, "grad_norm": 0.21963523463947673, "learning_rate": 3.35972402590084e-05, "loss": 0.3334, "loss_nan_ranks": 0, "loss_rank_avg": 0.08650698512792587, "step": 265, "valid_targets_mean": 6181.9, "valid_targets_min": 3567 }, { "epoch": 1.7231999999999998, "grad_norm": 0.2654405120685201, "learning_rate": 3.326757663240291e-05, "loss": 0.3428, "loss_nan_ranks": 0, "loss_rank_avg": 0.08159274607896805, "step": 270, "valid_targets_mean": 5128.4, "valid_targets_min": 1748 }, { "epoch": 1.7551999999999999, "grad_norm": 0.24111341689755342, "learning_rate": 3.293134544592073e-05, "loss": 0.3408, "loss_nan_ranks": 0, "loss_rank_avg": 0.08390471339225769, "step": 275, "valid_targets_mean": 5030.9, "valid_targets_min": 4017 }, { "epoch": 1.7872, "grad_norm": 0.2168712885276149, "learning_rate": 3.258871313677274e-05, "loss": 0.3408, "loss_nan_ranks": 0, "loss_rank_avg": 0.08097031712532043, "step": 280, "valid_targets_mean": 5817.1, "valid_targets_min": 1078 }, { "epoch": 1.8192, "grad_norm": 0.24477381681141186, "learning_rate": 3.2239849310778316e-05, "loss": 0.3442, "loss_nan_ranks": 0, "loss_rank_avg": 0.08444704860448837, "step": 285, "valid_targets_mean": 5124.5, "valid_targets_min": 2765 }, { "epoch": 1.8512, "grad_norm": 0.23685277446824146, "learning_rate": 3.188492665840909e-05, "loss": 0.3392, "loss_nan_ranks": 0, "loss_rank_avg": 0.0879988819360733, "step": 290, "valid_targets_mean": 5757.4, "valid_targets_min": 4257 }, { "epoch": 1.8832, "grad_norm": 0.24431534481839284, "learning_rate": 3.1524120869305726e-05, "loss": 0.3477, "loss_nan_ranks": 0, "loss_rank_avg": 0.0799805298447609, "step": 295, "valid_targets_mean": 5013.1, "valid_targets_min": 2408 }, { "epoch": 1.9152, "grad_norm": 0.23717088002345657, "learning_rate": 3.11576105453101e-05, "loss": 0.336, "loss_nan_ranks": 0, "loss_rank_avg": 0.08305468410253525, "step": 300, "valid_targets_mean": 5636.8, "valid_targets_min": 3806 }, { "epoch": 1.9472, "grad_norm": 0.23843928568213113, "learning_rate": 3.0785577112055916e-05, "loss": 0.3325, "loss_nan_ranks": 0, "loss_rank_avg": 0.08401763439178467, "step": 305, "valid_targets_mean": 5015.1, "valid_targets_min": 4040 }, { "epoch": 1.9792, "grad_norm": 0.22230671454556905, "learning_rate": 3.040820472916153e-05, "loss": 0.338, "loss_nan_ranks": 0, "loss_rank_avg": 0.08706588298082352, "step": 310, "valid_targets_mean": 5607.3, "valid_targets_min": 2762 }, { "epoch": 2.0064, "grad_norm": 0.24335412699803988, "learning_rate": 3.002568019906939e-05, "loss": 0.3354, "loss_nan_ranks": 0, "loss_rank_avg": 0.08025571703910828, "step": 315, "valid_targets_mean": 5478.8, "valid_targets_min": 3702 }, { "epoch": 2.0384, "grad_norm": 0.24330841804288073, "learning_rate": 2.963819287457733e-05, "loss": 0.3357, "loss_nan_ranks": 0, "loss_rank_avg": 0.07753533124923706, "step": 320, "valid_targets_mean": 6092.9, "valid_targets_min": 3663 }, { "epoch": 2.0704, "grad_norm": 0.25064283317993247, "learning_rate": 2.924593456510733e-05, "loss": 0.3356, "loss_nan_ranks": 0, "loss_rank_avg": 0.08604463934898376, "step": 325, "valid_targets_mean": 5441.4, "valid_targets_min": 3454 }, { "epoch": 2.1024, "grad_norm": 0.24013319984882203, "learning_rate": 2.8849099441758306e-05, "loss": 0.3402, "loss_nan_ranks": 0, "loss_rank_avg": 0.0855434238910675, "step": 330, "valid_targets_mean": 6197.1, "valid_targets_min": 4166 }, { "epoch": 2.1344, "grad_norm": 0.23527928586707625, "learning_rate": 2.844788394118979e-05, "loss": 0.3414, "loss_nan_ranks": 0, "loss_rank_avg": 0.07986783981323242, "step": 335, "valid_targets_mean": 4266.2, "valid_targets_min": 1054 }, { "epoch": 2.1664, "grad_norm": 0.2476030088095561, "learning_rate": 2.8042486668384164e-05, "loss": 0.3335, "loss_nan_ranks": 0, "loss_rank_avg": 0.08800746500492096, "step": 340, "valid_targets_mean": 5671.7, "valid_targets_min": 2845 }, { "epoch": 2.1984, "grad_norm": 0.2212675652031375, "learning_rate": 2.7633108298335582e-05, "loss": 0.3239, "loss_nan_ranks": 0, "loss_rank_avg": 0.0804094448685646, "step": 345, "valid_targets_mean": 6075.1, "valid_targets_min": 2695 }, { "epoch": 2.2304, "grad_norm": 0.25952169809602754, "learning_rate": 2.721995147671416e-05, "loss": 0.323, "loss_nan_ranks": 0, "loss_rank_avg": 0.09103801101446152, "step": 350, "valid_targets_mean": 5498.5, "valid_targets_min": 2659 }, { "epoch": 2.2624, "grad_norm": 0.2383291871019339, "learning_rate": 2.68032207195547e-05, "loss": 0.3403, "loss_nan_ranks": 0, "loss_rank_avg": 0.09590868651866913, "step": 355, "valid_targets_mean": 5983.8, "valid_targets_min": 3808 }, { "epoch": 2.2944, "grad_norm": 0.2442383332052773, "learning_rate": 2.6383122312019604e-05, "loss": 0.3331, "loss_nan_ranks": 0, "loss_rank_avg": 0.09253934025764465, "step": 360, "valid_targets_mean": 5522.2, "valid_targets_min": 4197 }, { "epoch": 2.3264, "grad_norm": 0.26697574295179705, "learning_rate": 2.595986420628597e-05, "loss": 0.3431, "loss_nan_ranks": 0, "loss_rank_avg": 0.08339850604534149, "step": 365, "valid_targets_mean": 5238.8, "valid_targets_min": 4166 }, { "epoch": 2.3584, "grad_norm": 0.22410653282363405, "learning_rate": 2.5533655918607573e-05, "loss": 0.3386, "loss_nan_ranks": 0, "loss_rank_avg": 0.07287449389696121, "step": 370, "valid_targets_mean": 4816.2, "valid_targets_min": 1188 }, { "epoch": 2.3904, "grad_norm": 0.24337444824543214, "learning_rate": 2.510470842560259e-05, "loss": 0.3355, "loss_nan_ranks": 0, "loss_rank_avg": 0.09005071222782135, "step": 375, "valid_targets_mean": 5576.5, "valid_targets_min": 1602 }, { "epoch": 2.4224, "grad_norm": 0.23066395747891116, "learning_rate": 2.467323405981841e-05, "loss": 0.3242, "loss_nan_ranks": 0, "loss_rank_avg": 0.08278749883174896, "step": 380, "valid_targets_mean": 5321.2, "valid_targets_min": 2324 }, { "epoch": 2.4544, "grad_norm": 0.2193267803280765, "learning_rate": 2.423944640462533e-05, "loss": 0.3271, "loss_nan_ranks": 0, "loss_rank_avg": 0.08832499384880066, "step": 385, "valid_targets_mean": 5073.8, "valid_targets_min": 1054 }, { "epoch": 2.4864, "grad_norm": 0.21806083908641838, "learning_rate": 2.3803560188490968e-05, "loss": 0.3303, "loss_nan_ranks": 0, "loss_rank_avg": 0.08088940382003784, "step": 390, "valid_targets_mean": 6070.9, "valid_targets_min": 3083 }, { "epoch": 2.5183999999999997, "grad_norm": 0.22164532444220172, "learning_rate": 2.336579117868789e-05, "loss": 0.335, "loss_nan_ranks": 0, "loss_rank_avg": 0.09069826453924179, "step": 395, "valid_targets_mean": 5716.8, "valid_targets_min": 1389 }, { "epoch": 2.5504, "grad_norm": 0.23682560385891782, "learning_rate": 2.292635607448711e-05, "loss": 0.325, "loss_nan_ranks": 0, "loss_rank_avg": 0.08261828124523163, "step": 400, "valid_targets_mean": 5917.8, "valid_targets_min": 1918 }, { "epoch": 2.5824, "grad_norm": 0.24346477170249317, "learning_rate": 2.248547239989008e-05, "loss": 0.3371, "loss_nan_ranks": 0, "loss_rank_avg": 0.08386968076229095, "step": 405, "valid_targets_mean": 5211.4, "valid_targets_min": 3124 }, { "epoch": 2.6144, "grad_norm": 0.19472948812532634, "learning_rate": 2.204335839595255e-05, "loss": 0.3188, "loss_nan_ranks": 0, "loss_rank_avg": 0.07493388652801514, "step": 410, "valid_targets_mean": 5276.1, "valid_targets_min": 2549 }, { "epoch": 2.6464, "grad_norm": 0.22442481140650106, "learning_rate": 2.1600232912753452e-05, "loss": 0.3321, "loss_nan_ranks": 0, "loss_rank_avg": 0.07857003808021545, "step": 415, "valid_targets_mean": 5059.2, "valid_targets_min": 1478 }, { "epoch": 2.6784, "grad_norm": 0.21770822481824514, "learning_rate": 2.1156315301062293e-05, "loss": 0.3325, "loss_nan_ranks": 0, "loss_rank_avg": 0.08654189109802246, "step": 420, "valid_targets_mean": 6176.6, "valid_targets_min": 3999 }, { "epoch": 2.7104, "grad_norm": 0.2303772265208128, "learning_rate": 2.0711825303758712e-05, "loss": 0.3315, "loss_nan_ranks": 0, "loss_rank_avg": 0.07916657626628876, "step": 425, "valid_targets_mean": 4857.8, "valid_targets_min": 1871 }, { "epoch": 2.7424, "grad_norm": 0.2362702636474169, "learning_rate": 2.0266982947057962e-05, "loss": 0.3331, "loss_nan_ranks": 0, "loss_rank_avg": 0.08266505599021912, "step": 430, "valid_targets_mean": 5273.8, "valid_targets_min": 571 }, { "epoch": 2.7744, "grad_norm": 0.23238610336716467, "learning_rate": 1.9822008431596083e-05, "loss": 0.3318, "loss_nan_ranks": 0, "loss_rank_avg": 0.0778215304017067, "step": 435, "valid_targets_mean": 5079.6, "valid_targets_min": 1018 }, { "epoch": 2.8064, "grad_norm": 0.22745238988383099, "learning_rate": 1.937712202342881e-05, "loss": 0.3324, "loss_nan_ranks": 0, "loss_rank_avg": 0.08198985457420349, "step": 440, "valid_targets_mean": 5034.9, "valid_targets_min": 2190 }, { "epoch": 2.8384, "grad_norm": 0.22031663400522064, "learning_rate": 1.8932543944998037e-05, "loss": 0.3362, "loss_nan_ranks": 0, "loss_rank_avg": 0.09263623505830765, "step": 445, "valid_targets_mean": 5411.8, "valid_targets_min": 4103 }, { "epoch": 2.8704, "grad_norm": 0.23446135635447007, "learning_rate": 1.8488494266119877e-05, "loss": 0.3245, "loss_nan_ranks": 0, "loss_rank_avg": 0.08320371061563492, "step": 450, "valid_targets_mean": 5275.1, "valid_targets_min": 3131 }, { "epoch": 2.9024, "grad_norm": 0.2234904380138594, "learning_rate": 1.804519279504834e-05, "loss": 0.3251, "loss_nan_ranks": 0, "loss_rank_avg": 0.07954467833042145, "step": 455, "valid_targets_mean": 4903.2, "valid_targets_min": 1309 }, { "epoch": 2.9344, "grad_norm": 0.2274787643306054, "learning_rate": 1.7602858969668365e-05, "loss": 0.3294, "loss_nan_ranks": 0, "loss_rank_avg": 0.08588364720344543, "step": 460, "valid_targets_mean": 6065.0, "valid_targets_min": 4174 }, { "epoch": 2.9664, "grad_norm": 0.21541040235652797, "learning_rate": 1.716171174887231e-05, "loss": 0.3223, "loss_nan_ranks": 0, "loss_rank_avg": 0.08300478011369705, "step": 465, "valid_targets_mean": 5221.2, "valid_targets_min": 2466 }, { "epoch": 2.9984, "grad_norm": 0.20577577268402722, "learning_rate": 1.6721969504173484e-05, "loss": 0.328, "loss_nan_ranks": 0, "loss_rank_avg": 0.08499614894390106, "step": 470, "valid_targets_mean": 6225.9, "valid_targets_min": 3937 }, { "epoch": 3.0256, "grad_norm": 0.21751044527982824, "learning_rate": 1.628384991161041e-05, "loss": 0.3194, "loss_nan_ranks": 0, "loss_rank_avg": 0.08121082186698914, "step": 475, "valid_targets_mean": 6044.2, "valid_targets_min": 4658 }, { "epoch": 3.0576, "grad_norm": 0.21018229320526427, "learning_rate": 1.5847569843995452e-05, "loss": 0.3234, "loss_nan_ranks": 0, "loss_rank_avg": 0.07966670393943787, "step": 480, "valid_targets_mean": 5516.9, "valid_targets_min": 3439 }, { "epoch": 3.0896, "grad_norm": 0.21179087254975096, "learning_rate": 1.5413345263560922e-05, "loss": 0.3269, "loss_nan_ranks": 0, "loss_rank_avg": 0.08021973818540573, "step": 485, "valid_targets_mean": 5511.8, "valid_targets_min": 625 }, { "epoch": 3.1216, "grad_norm": 0.21533786371733335, "learning_rate": 1.4981391115056032e-05, "loss": 0.3151, "loss_nan_ranks": 0, "loss_rank_avg": 0.08071337640285492, "step": 490, "valid_targets_mean": 6069.5, "valid_targets_min": 4143 }, { "epoch": 3.1536, "grad_norm": 0.22340718433208287, "learning_rate": 1.455192121934748e-05, "loss": 0.3138, "loss_nan_ranks": 0, "loss_rank_avg": 0.07231708616018295, "step": 495, "valid_targets_mean": 5085.9, "valid_targets_min": 813 }, { "epoch": 3.1856, "grad_norm": 0.23470120256913535, "learning_rate": 1.4125148167576303e-05, "loss": 0.3316, "loss_nan_ranks": 0, "loss_rank_avg": 0.08944922685623169, "step": 500, "valid_targets_mean": 5925.7, "valid_targets_min": 4258 }, { "epoch": 3.2176, "grad_norm": 0.2225970343777138, "learning_rate": 1.3701283215923563e-05, "loss": 0.3198, "loss_nan_ranks": 0, "loss_rank_avg": 0.07638940215110779, "step": 505, "valid_targets_mean": 5141.3, "valid_targets_min": 1120 }, { "epoch": 3.2496, "grad_norm": 0.22812978410814985, "learning_rate": 1.328053618103677e-05, "loss": 0.3283, "loss_nan_ranks": 0, "loss_rank_avg": 0.08584798872470856, "step": 510, "valid_targets_mean": 5091.2, "valid_targets_min": 1999 }, { "epoch": 3.2816, "grad_norm": 0.2108055801479295, "learning_rate": 1.2863115336168916e-05, "loss": 0.3257, "loss_nan_ranks": 0, "loss_rank_avg": 0.07783376425504684, "step": 515, "valid_targets_mean": 5384.1, "valid_targets_min": 935 }, { "epoch": 3.3136, "grad_norm": 0.27065537006118323, "learning_rate": 1.2449227308081509e-05, "loss": 0.3207, "loss_nan_ranks": 0, "loss_rank_avg": 0.07168179750442505, "step": 520, "valid_targets_mean": 5510.8, "valid_targets_min": 686 }, { "epoch": 3.3456, "grad_norm": 0.22197248150979024, "learning_rate": 1.2039076974762587e-05, "loss": 0.3254, "loss_nan_ranks": 0, "loss_rank_avg": 0.08753179013729095, "step": 525, "valid_targets_mean": 5997.3, "valid_targets_min": 4451 }, { "epoch": 3.3776, "grad_norm": 0.21204994803562507, "learning_rate": 1.163286736401044e-05, "loss": 0.3295, "loss_nan_ranks": 0, "loss_rank_avg": 0.07973788678646088, "step": 530, "valid_targets_mean": 5424.2, "valid_targets_min": 2094 }, { "epoch": 3.4096, "grad_norm": 0.20565663141003207, "learning_rate": 1.123079955293322e-05, "loss": 0.3268, "loss_nan_ranks": 0, "loss_rank_avg": 0.07733616232872009, "step": 535, "valid_targets_mean": 5321.2, "valid_targets_min": 1101 }, { "epoch": 3.4416, "grad_norm": 0.21601342130688855, "learning_rate": 1.0833072568414037e-05, "loss": 0.3238, "loss_nan_ranks": 0, "loss_rank_avg": 0.08604863286018372, "step": 540, "valid_targets_mean": 5389.1, "valid_targets_min": 2707 }, { "epoch": 3.4736000000000002, "grad_norm": 0.2266250484856398, "learning_rate": 1.0439883288591057e-05, "loss": 0.3234, "loss_nan_ranks": 0, "loss_rank_avg": 0.08474144339561462, "step": 545, "valid_targets_mean": 5556.5, "valid_targets_min": 3476 }, { "epoch": 3.5056000000000003, "grad_norm": 0.21000231515568052, "learning_rate": 1.0051426345401202e-05, "loss": 0.3226, "loss_nan_ranks": 0, "loss_rank_avg": 0.07983237504959106, "step": 550, "valid_targets_mean": 5647.8, "valid_targets_min": 1239 }, { "epoch": 3.5376, "grad_norm": 0.21908337779577544, "learning_rate": 9.667894028235704e-06, "loss": 0.328, "loss_nan_ranks": 0, "loss_rank_avg": 0.07918648421764374, "step": 555, "valid_targets_mean": 5250.9, "valid_targets_min": 2989 }, { "epoch": 3.5696, "grad_norm": 0.20775201883443636, "learning_rate": 9.289476188755315e-06, "loss": 0.3207, "loss_nan_ranks": 0, "loss_rank_avg": 0.08295097947120667, "step": 560, "valid_targets_mean": 6207.6, "valid_targets_min": 4217 }, { "epoch": 3.6016, "grad_norm": 0.21294278678462036, "learning_rate": 8.916360146912122e-06, "loss": 0.3194, "loss_nan_ranks": 0, "loss_rank_avg": 0.07971015572547913, "step": 565, "valid_targets_mean": 5787.9, "valid_targets_min": 3808 }, { "epoch": 3.6336, "grad_norm": 0.22408542647313262, "learning_rate": 8.548730598224646e-06, "loss": 0.3242, "loss_nan_ranks": 0, "loss_rank_avg": 0.08619304746389389, "step": 570, "valid_targets_mean": 5396.3, "valid_targets_min": 3498 }, { "epoch": 3.6656, "grad_norm": 0.2289853339804503, "learning_rate": 8.186769522352053e-06, "loss": 0.3204, "loss_nan_ranks": 0, "loss_rank_avg": 0.08720386028289795, "step": 575, "valid_targets_mean": 5984.4, "valid_targets_min": 4048 }, { "epoch": 3.6976, "grad_norm": 0.21073530692466028, "learning_rate": 7.830656093012714e-06, "loss": 0.3286, "loss_nan_ranks": 0, "loss_rank_avg": 0.08480887115001678, "step": 580, "valid_targets_mean": 5643.3, "valid_targets_min": 2574 }, { "epoch": 3.7296, "grad_norm": 0.22372831454903153, "learning_rate": 7.480566589291696e-06, "loss": 0.3271, "loss_nan_ranks": 0, "loss_rank_avg": 0.0835227221250534, "step": 585, "valid_targets_mean": 5632.9, "valid_targets_min": 3630 }, { "epoch": 3.7616, "grad_norm": 0.19869385568005432, "learning_rate": 7.1366743083812285e-06, "loss": 0.3139, "loss_nan_ranks": 0, "loss_rank_avg": 0.07951802760362625, "step": 590, "valid_targets_mean": 5470.8, "valid_targets_min": 3152 }, { "epoch": 3.7936, "grad_norm": 0.20574173001103932, "learning_rate": 6.799149479797101e-06, "loss": 0.3233, "loss_nan_ranks": 0, "loss_rank_avg": 0.0819997638463974, "step": 595, "valid_targets_mean": 5568.1, "valid_targets_min": 4110 }, { "epoch": 3.8256, "grad_norm": 0.2152111168894627, "learning_rate": 6.4681591811137e-06, "loss": 0.3264, "loss_nan_ranks": 0, "loss_rank_avg": 0.09062743186950684, "step": 600, "valid_targets_mean": 5654.9, "valid_targets_min": 2936 }, { "epoch": 3.8576, "grad_norm": 0.2053653642091841, "learning_rate": 6.143867255259197e-06, "loss": 0.3286, "loss_nan_ranks": 0, "loss_rank_avg": 0.08145453035831451, "step": 605, "valid_targets_mean": 5277.7, "valid_targets_min": 958 }, { "epoch": 3.8895999999999997, "grad_norm": 0.19618687651688638, "learning_rate": 5.8264342294119504e-06, "loss": 0.3307, "loss_nan_ranks": 0, "loss_rank_avg": 0.07444492727518082, "step": 610, "valid_targets_mean": 5692.4, "valid_targets_min": 1054 }, { "epoch": 3.9215999999999998, "grad_norm": 0.4633944919440031, "learning_rate": 5.516017235538258e-06, "loss": 0.3218, "loss_nan_ranks": 0, "loss_rank_avg": 0.07183433324098587, "step": 615, "valid_targets_mean": 6619.4, "valid_targets_min": 4473 }, { "epoch": 3.9536, "grad_norm": 0.19886084345698227, "learning_rate": 5.212769932610695e-06, "loss": 0.3221, "loss_nan_ranks": 0, "loss_rank_avg": 0.0788281261920929, "step": 620, "valid_targets_mean": 5528.3, "valid_targets_min": 3187 }, { "epoch": 3.9856, "grad_norm": 0.208092162317957, "learning_rate": 4.916842430545681e-06, "loss": 0.3307, "loss_nan_ranks": 0, "loss_rank_avg": 0.08409751206636429, "step": 625, "valid_targets_mean": 6215.4, "valid_targets_min": 3757 }, { "epoch": 4.0128, "grad_norm": 0.21479215026797172, "learning_rate": 4.628381215897837e-06, "loss": 0.3245, "loss_nan_ranks": 0, "loss_rank_avg": 0.08276808261871338, "step": 630, "valid_targets_mean": 4926.9, "valid_targets_min": 2898 }, { "epoch": 4.0448, "grad_norm": 0.20550296016432856, "learning_rate": 4.347529079347914e-06, "loss": 0.3226, "loss_nan_ranks": 0, "loss_rank_avg": 0.07196902483701706, "step": 635, "valid_targets_mean": 5715.8, "valid_targets_min": 710 }, { "epoch": 4.0768, "grad_norm": 0.19841931672206817, "learning_rate": 4.074425045020247e-06, "loss": 0.3055, "loss_nan_ranks": 0, "loss_rank_avg": 0.07841131091117859, "step": 640, "valid_targets_mean": 5529.1, "valid_targets_min": 881 }, { "epoch": 4.1088, "grad_norm": 0.20632895716592087, "learning_rate": 3.8092043016646487e-06, "loss": 0.3238, "loss_nan_ranks": 0, "loss_rank_avg": 0.08663403987884521, "step": 645, "valid_targets_mean": 5540.6, "valid_targets_min": 646 }, { "epoch": 4.1408, "grad_norm": 0.19479668863200172, "learning_rate": 3.551998135736867e-06, "loss": 0.3247, "loss_nan_ranks": 0, "loss_rank_avg": 0.08184544742107391, "step": 650, "valid_targets_mean": 5659.1, "valid_targets_min": 3048 }, { "epoch": 4.1728, "grad_norm": 0.21722025161657196, "learning_rate": 3.3029338664107267e-06, "loss": 0.3205, "loss_nan_ranks": 0, "loss_rank_avg": 0.08144575357437134, "step": 655, "valid_targets_mean": 5178.1, "valid_targets_min": 571 }, { "epoch": 4.2048, "grad_norm": 0.20576725177526284, "learning_rate": 3.0621347825540625e-06, "loss": 0.3249, "loss_nan_ranks": 0, "loss_rank_avg": 0.07952291518449783, "step": 660, "valid_targets_mean": 5043.1, "valid_targets_min": 1090 }, { "epoch": 4.2368, "grad_norm": 0.2137647103914308, "learning_rate": 2.8297200816997183e-06, "loss": 0.3243, "loss_nan_ranks": 0, "loss_rank_avg": 0.08988995850086212, "step": 665, "valid_targets_mean": 6148.6, "valid_targets_min": 4401 }, { "epoch": 4.2688, "grad_norm": 0.20096638447003157, "learning_rate": 2.605804811041803e-06, "loss": 0.3129, "loss_nan_ranks": 0, "loss_rank_avg": 0.0822472795844078, "step": 670, "valid_targets_mean": 5288.3, "valid_targets_min": 3220 }, { "epoch": 4.3008, "grad_norm": 0.20183040579550365, "learning_rate": 2.390499810486351e-06, "loss": 0.3338, "loss_nan_ranks": 0, "loss_rank_avg": 0.08840819448232651, "step": 675, "valid_targets_mean": 5098.6, "valid_targets_min": 3391 }, { "epoch": 4.3328, "grad_norm": 0.19426925166522613, "learning_rate": 2.183911657784685e-06, "loss": 0.3189, "loss_nan_ranks": 0, "loss_rank_avg": 0.08456306159496307, "step": 680, "valid_targets_mean": 5405.1, "valid_targets_min": 3206 }, { "epoch": 4.3648, "grad_norm": 0.20116698442114167, "learning_rate": 1.986142615776532e-06, "loss": 0.3304, "loss_nan_ranks": 0, "loss_rank_avg": 0.07993205636739731, "step": 685, "valid_targets_mean": 5235.6, "valid_targets_min": 1167 }, { "epoch": 4.3968, "grad_norm": 0.20291008340159364, "learning_rate": 1.7972905817690644e-06, "loss": 0.3178, "loss_nan_ranks": 0, "loss_rank_avg": 0.08226853609085083, "step": 690, "valid_targets_mean": 5901.6, "valid_targets_min": 4097 }, { "epoch": 4.4288, "grad_norm": 0.1970309784297005, "learning_rate": 1.617449039076955e-06, "loss": 0.3205, "loss_nan_ranks": 0, "loss_rank_avg": 0.0889642983675003, "step": 695, "valid_targets_mean": 6463.2, "valid_targets_min": 3151 }, { "epoch": 4.4608, "grad_norm": 0.21957818241096985, "learning_rate": 1.4467070107473413e-06, "loss": 0.3102, "loss_nan_ranks": 0, "loss_rank_avg": 0.08201165497303009, "step": 700, "valid_targets_mean": 5256.1, "valid_targets_min": 1931 }, { "epoch": 4.4928, "grad_norm": 0.20381806326480958, "learning_rate": 1.2851490154926816e-06, "loss": 0.3217, "loss_nan_ranks": 0, "loss_rank_avg": 0.08283894509077072, "step": 705, "valid_targets_mean": 5474.8, "valid_targets_min": 3711 }, { "epoch": 4.5248, "grad_norm": 0.20007664300882147, "learning_rate": 1.1328550258533211e-06, "loss": 0.3195, "loss_nan_ranks": 0, "loss_rank_avg": 0.08363893628120422, "step": 710, "valid_targets_mean": 5336.9, "valid_targets_min": 4024 }, { "epoch": 4.5568, "grad_norm": 0.2027444945770559, "learning_rate": 9.899004286103953e-07, "loss": 0.3145, "loss_nan_ranks": 0, "loss_rank_avg": 0.08135296404361725, "step": 715, "valid_targets_mean": 5425.2, "valid_targets_min": 3646 }, { "epoch": 4.5888, "grad_norm": 0.18936965807900735, "learning_rate": 8.5635598746876e-07, "loss": 0.3156, "loss_nan_ranks": 0, "loss_rank_avg": 0.07710923254489899, "step": 720, "valid_targets_mean": 6018.6, "valid_targets_min": 4053 }, { "epoch": 4.6208, "grad_norm": 0.1847915315828253, "learning_rate": 7.32287808028389e-07, "loss": 0.3265, "loss_nan_ranks": 0, "loss_rank_avg": 0.07985624670982361, "step": 725, "valid_targets_mean": 6699.7, "valid_targets_min": 2723 }, { "epoch": 4.6528, "grad_norm": 0.2099701962926105, "learning_rate": 6.177573050615327e-07, "loss": 0.3154, "loss_nan_ranks": 0, "loss_rank_avg": 0.08430498838424683, "step": 730, "valid_targets_mean": 5119.5, "valid_targets_min": 3867 }, { "epoch": 4.6848, "grad_norm": 0.18915002670306677, "learning_rate": 5.128211721119213e-07, "loss": 0.3188, "loss_nan_ranks": 0, "loss_rank_avg": 0.0837610512971878, "step": 735, "valid_targets_mean": 6211.8, "valid_targets_min": 4771 }, { "epoch": 4.7168, "grad_norm": 0.21175210312067896, "learning_rate": 4.175313534309755e-07, "loss": 0.3217, "loss_nan_ranks": 0, "loss_rank_avg": 0.08258293569087982, "step": 740, "valid_targets_mean": 5606.8, "valid_targets_min": 4067 }, { "epoch": 4.7488, "grad_norm": 0.19171064939070626, "learning_rate": 3.319350182649861e-07, "loss": 0.3207, "loss_nan_ranks": 0, "loss_rank_avg": 0.08525727689266205, "step": 745, "valid_targets_mean": 6320.4, "valid_targets_min": 3489 }, { "epoch": 4.7808, "grad_norm": 0.20140414971721998, "learning_rate": 2.560745375059392e-07, "loss": 0.3179, "loss_nan_ranks": 0, "loss_rank_avg": 0.07746146619319916, "step": 750, "valid_targets_mean": 5373.7, "valid_targets_min": 3737 }, { "epoch": 4.8128, "grad_norm": 0.19725191792852176, "learning_rate": 1.8998746271758016e-07, "loss": 0.3231, "loss_nan_ranks": 0, "loss_rank_avg": 0.07618357986211777, "step": 755, "valid_targets_mean": 5033.5, "valid_targets_min": 1995 }, { "epoch": 4.8448, "grad_norm": 0.18764613291156892, "learning_rate": 1.337065075470778e-07, "loss": 0.3189, "loss_nan_ranks": 0, "loss_rank_avg": 0.0758911669254303, "step": 760, "valid_targets_mean": 5730.7, "valid_targets_min": 3598 }, { "epoch": 4.8768, "grad_norm": 0.18701696026878642, "learning_rate": 8.725953153150279e-08, "loss": 0.3149, "loss_nan_ranks": 0, "loss_rank_avg": 0.07627741992473602, "step": 765, "valid_targets_mean": 5283.1, "valid_targets_min": 389 }, { "epoch": 4.9088, "grad_norm": 0.20233391129510425, "learning_rate": 5.066952630711886e-08, "loss": 0.3263, "loss_nan_ranks": 0, "loss_rank_avg": 0.08208300173282623, "step": 770, "valid_targets_mean": 5043.8, "valid_targets_min": 645 }, { "epoch": 4.9408, "grad_norm": 0.19082641034730213, "learning_rate": 2.3954604228342283e-08, "loss": 0.3236, "loss_nan_ranks": 0, "loss_rank_avg": 0.07420461624860764, "step": 775, "valid_targets_mean": 5601.7, "valid_targets_min": 4300 }, { "epoch": 4.9728, "grad_norm": 0.19359894736289907, "learning_rate": 7.12798940197601e-09, "loss": 0.3208, "loss_nan_ranks": 0, "loss_rank_avg": 0.08184170722961426, "step": 780, "valid_targets_mean": 5441.5, "valid_targets_min": 1112 }, { "epoch": 5.0, "grad_norm": 0.4056907192862938, "learning_rate": 1.9801114115480802e-10, "loss": 0.3226, "loss_nan_ranks": 0, "loss_rank_avg": 0.3350988030433655, "step": 785, "valid_targets_mean": 5269.4, "valid_targets_min": 3557 }, { "epoch": 5.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.3350988030433655, "step": 785, "total_flos": 2.1121566765544899e+18, "train_loss": 0.35298769489215437, "train_runtime": 22279.9871, "train_samples_per_second": 2.242, "train_steps_per_second": 0.035, "valid_targets_mean": 5269.4, "valid_targets_min": 3557 } ], "logging_steps": 5, "max_steps": 785, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.1121566765544899e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }