{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 785, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.032, "grad_norm": 5.453780786232042, "learning_rate": 2.0253164556962026e-06, "loss": 0.8032, "loss_nan_ranks": 0, "loss_rank_avg": 0.20371782779693604, "step": 5, "valid_targets_mean": 6761.0, "valid_targets_min": 1767 }, { "epoch": 0.064, "grad_norm": 3.792389792983448, "learning_rate": 4.556962025316456e-06, "loss": 0.7643, "loss_nan_ranks": 0, "loss_rank_avg": 0.1712636798620224, "step": 10, "valid_targets_mean": 4999.5, "valid_targets_min": 1304 }, { "epoch": 0.096, "grad_norm": 2.063032028483624, "learning_rate": 7.08860759493671e-06, "loss": 0.7318, "loss_nan_ranks": 0, "loss_rank_avg": 0.17536824941635132, "step": 15, "valid_targets_mean": 5833.2, "valid_targets_min": 1211 }, { "epoch": 0.128, "grad_norm": 0.8871678160368911, "learning_rate": 9.620253164556963e-06, "loss": 0.6933, "loss_nan_ranks": 0, "loss_rank_avg": 0.18351516127586365, "step": 20, "valid_targets_mean": 6014.7, "valid_targets_min": 3717 }, { "epoch": 0.16, "grad_norm": 0.7382530806327504, "learning_rate": 1.2151898734177216e-05, "loss": 0.6776, "loss_nan_ranks": 0, "loss_rank_avg": 0.1541610211133957, "step": 25, "valid_targets_mean": 5068.2, "valid_targets_min": 1347 }, { "epoch": 0.192, "grad_norm": 0.514129978379908, "learning_rate": 1.468354430379747e-05, "loss": 0.6547, "loss_nan_ranks": 0, "loss_rank_avg": 0.15168946981430054, "step": 30, "valid_targets_mean": 5006.1, "valid_targets_min": 881 }, { "epoch": 0.224, "grad_norm": 0.4336197541753502, "learning_rate": 1.7215189873417723e-05, "loss": 0.6208, "loss_nan_ranks": 0, "loss_rank_avg": 0.14679864048957825, "step": 35, "valid_targets_mean": 5579.1, "valid_targets_min": 1357 }, { "epoch": 0.256, "grad_norm": 0.36433161326142466, "learning_rate": 1.974683544303798e-05, "loss": 0.6069, "loss_nan_ranks": 0, "loss_rank_avg": 0.15517079830169678, "step": 40, "valid_targets_mean": 5445.0, "valid_targets_min": 1474 }, { "epoch": 0.288, "grad_norm": 0.32965171941859056, "learning_rate": 2.2278481012658228e-05, "loss": 0.5728, "loss_nan_ranks": 0, "loss_rank_avg": 0.14498282968997955, "step": 45, "valid_targets_mean": 6170.8, "valid_targets_min": 3922 }, { "epoch": 0.32, "grad_norm": 0.2872624196401524, "learning_rate": 2.481012658227848e-05, "loss": 0.5765, "loss_nan_ranks": 0, "loss_rank_avg": 0.1473766267299652, "step": 50, "valid_targets_mean": 6663.2, "valid_targets_min": 4765 }, { "epoch": 0.352, "grad_norm": 0.27100975287589185, "learning_rate": 2.7341772151898737e-05, "loss": 0.5616, "loss_nan_ranks": 0, "loss_rank_avg": 0.14131703972816467, "step": 55, "valid_targets_mean": 5866.8, "valid_targets_min": 1425 }, { "epoch": 0.384, "grad_norm": 0.2637945073634897, "learning_rate": 2.987341772151899e-05, "loss": 0.5483, "loss_nan_ranks": 0, "loss_rank_avg": 0.13468310236930847, "step": 60, "valid_targets_mean": 5623.7, "valid_targets_min": 1606 }, { "epoch": 0.416, "grad_norm": 0.2609484255153037, "learning_rate": 3.240506329113924e-05, "loss": 0.5464, "loss_nan_ranks": 0, "loss_rank_avg": 0.10717941075563431, "step": 65, "valid_targets_mean": 5325.4, "valid_targets_min": 1780 }, { "epoch": 0.448, "grad_norm": 0.2475619693697882, "learning_rate": 3.49367088607595e-05, "loss": 0.5473, "loss_nan_ranks": 0, "loss_rank_avg": 0.14007271826267242, "step": 70, "valid_targets_mean": 5930.2, "valid_targets_min": 1506 }, { "epoch": 0.48, "grad_norm": 0.24464093539865395, "learning_rate": 3.746835443037975e-05, "loss": 0.5233, "loss_nan_ranks": 0, "loss_rank_avg": 0.12224975228309631, "step": 75, "valid_targets_mean": 5512.3, "valid_targets_min": 2940 }, { "epoch": 0.512, "grad_norm": 0.22999667928124048, "learning_rate": 4e-05, "loss": 0.5253, "loss_nan_ranks": 0, "loss_rank_avg": 0.12176463007926941, "step": 80, "valid_targets_mean": 5304.3, "valid_targets_min": 3470 }, { "epoch": 0.544, "grad_norm": 0.23934636533674483, "learning_rate": 3.999504991751045e-05, "loss": 0.5338, "loss_nan_ranks": 0, "loss_rank_avg": 0.13291212916374207, "step": 85, "valid_targets_mean": 5400.6, "valid_targets_min": 1624 }, { "epoch": 0.576, "grad_norm": 0.22974570019649057, "learning_rate": 3.9980202120373464e-05, "loss": 0.5139, "loss_nan_ranks": 0, "loss_rank_avg": 0.12326923757791519, "step": 90, "valid_targets_mean": 4998.4, "valid_targets_min": 1247 }, { "epoch": 0.608, "grad_norm": 0.2926220288463877, "learning_rate": 3.995546395837111e-05, "loss": 0.5038, "loss_nan_ranks": 0, "loss_rank_avg": 0.1356104016304016, "step": 95, "valid_targets_mean": 5303.9, "valid_targets_min": 1326 }, { "epoch": 0.64, "grad_norm": 0.23427791767385714, "learning_rate": 3.992084767709763e-05, "loss": 0.5066, "loss_nan_ranks": 0, "loss_rank_avg": 0.1405172497034073, "step": 100, "valid_targets_mean": 5991.0, "valid_targets_min": 3481 }, { "epoch": 0.672, "grad_norm": 0.28437672036074757, "learning_rate": 3.987637041189781e-05, "loss": 0.512, "loss_nan_ranks": 0, "loss_rank_avg": 0.1347738653421402, "step": 105, "valid_targets_mean": 6278.3, "valid_targets_min": 2344 }, { "epoch": 0.704, "grad_norm": 0.24924033321191602, "learning_rate": 3.982205417938482e-05, "loss": 0.4975, "loss_nan_ranks": 0, "loss_rank_avg": 0.11735053360462189, "step": 110, "valid_targets_mean": 5851.7, "valid_targets_min": 1129 }, { "epoch": 0.736, "grad_norm": 0.2507030670655199, "learning_rate": 3.975792586654179e-05, "loss": 0.5084, "loss_nan_ranks": 0, "loss_rank_avg": 0.12411265820264816, "step": 115, "valid_targets_mean": 5674.4, "valid_targets_min": 3051 }, { "epoch": 0.768, "grad_norm": 0.2500377635060955, "learning_rate": 3.968401721741259e-05, "loss": 0.5022, "loss_nan_ranks": 0, "loss_rank_avg": 0.12196642905473709, "step": 120, "valid_targets_mean": 6003.1, "valid_targets_min": 1515 }, { "epoch": 0.8, "grad_norm": 0.24135329786073206, "learning_rate": 3.960036481738819e-05, "loss": 0.5008, "loss_nan_ranks": 0, "loss_rank_avg": 0.12246815860271454, "step": 125, "valid_targets_mean": 5971.2, "valid_targets_min": 2866 }, { "epoch": 0.832, "grad_norm": 0.22519547761877387, "learning_rate": 3.950701007509667e-05, "loss": 0.4981, "loss_nan_ranks": 0, "loss_rank_avg": 0.09827093780040741, "step": 130, "valid_targets_mean": 5276.2, "valid_targets_min": 1312 }, { "epoch": 0.864, "grad_norm": 0.24777405687491888, "learning_rate": 3.940399920190552e-05, "loss": 0.4924, "loss_nan_ranks": 0, "loss_rank_avg": 0.1192011833190918, "step": 135, "valid_targets_mean": 5829.4, "valid_targets_min": 1973 }, { "epoch": 0.896, "grad_norm": 0.2461001388229048, "learning_rate": 3.92913831890467e-05, "loss": 0.5026, "loss_nan_ranks": 0, "loss_rank_avg": 0.127201110124588, "step": 140, "valid_targets_mean": 6177.5, "valid_targets_min": 3491 }, { "epoch": 0.928, "grad_norm": 0.26205526754490166, "learning_rate": 3.916921778237556e-05, "loss": 0.4921, "loss_nan_ranks": 0, "loss_rank_avg": 0.12378434836864471, "step": 145, "valid_targets_mean": 6577.8, "valid_targets_min": 2300 }, { "epoch": 0.96, "grad_norm": 0.2527022155127535, "learning_rate": 3.903756345477612e-05, "loss": 0.4917, "loss_nan_ranks": 0, "loss_rank_avg": 0.11821117997169495, "step": 150, "valid_targets_mean": 5569.9, "valid_targets_min": 1507 }, { "epoch": 0.992, "grad_norm": 0.24869143664799934, "learning_rate": 3.889648537622657e-05, "loss": 0.482, "loss_nan_ranks": 0, "loss_rank_avg": 0.11581720411777496, "step": 155, "valid_targets_mean": 5312.9, "valid_targets_min": 2593 }, { "epoch": 1.0192, "grad_norm": 0.23417657797639116, "learning_rate": 3.874605338153952e-05, "loss": 0.4875, "loss_nan_ranks": 0, "loss_rank_avg": 0.1262468695640564, "step": 160, "valid_targets_mean": 6192.8, "valid_targets_min": 4127 }, { "epoch": 1.0512, "grad_norm": 0.27974191809541077, "learning_rate": 3.8586341935793265e-05, "loss": 0.4818, "loss_nan_ranks": 0, "loss_rank_avg": 0.12143966555595398, "step": 165, "valid_targets_mean": 5954.1, "valid_targets_min": 1743 }, { "epoch": 1.0832, "grad_norm": 0.25435642517002544, "learning_rate": 3.841743009747089e-05, "loss": 0.4815, "loss_nan_ranks": 0, "loss_rank_avg": 0.13246093690395355, "step": 170, "valid_targets_mean": 6993.4, "valid_targets_min": 3801 }, { "epoch": 1.1152, "grad_norm": 0.25881160063054937, "learning_rate": 3.8239401479325714e-05, "loss": 0.4693, "loss_nan_ranks": 0, "loss_rank_avg": 0.11434349417686462, "step": 175, "valid_targets_mean": 4978.4, "valid_targets_min": 1260 }, { "epoch": 1.1472, "grad_norm": 0.2278502886084013, "learning_rate": 3.8052344206992276e-05, "loss": 0.4787, "loss_nan_ranks": 0, "loss_rank_avg": 0.12233040481805801, "step": 180, "valid_targets_mean": 7108.7, "valid_targets_min": 2155 }, { "epoch": 1.1792, "grad_norm": 0.25565502704610965, "learning_rate": 3.7856350875363396e-05, "loss": 0.4801, "loss_nan_ranks": 0, "loss_rank_avg": 0.11371868848800659, "step": 185, "valid_targets_mean": 6057.6, "valid_targets_min": 3039 }, { "epoch": 1.2112, "grad_norm": 0.24238202719508706, "learning_rate": 3.765151850275497e-05, "loss": 0.475, "loss_nan_ranks": 0, "loss_rank_avg": 0.10759443044662476, "step": 190, "valid_targets_mean": 5236.0, "valid_targets_min": 1548 }, { "epoch": 1.2432, "grad_norm": 0.2707586320724312, "learning_rate": 3.7437948482881104e-05, "loss": 0.4838, "loss_nan_ranks": 0, "loss_rank_avg": 0.10973644256591797, "step": 195, "valid_targets_mean": 4837.2, "valid_targets_min": 1166 }, { "epoch": 1.2752, "grad_norm": 0.26143437324759183, "learning_rate": 3.721574653466336e-05, "loss": 0.4737, "loss_nan_ranks": 0, "loss_rank_avg": 0.13594186305999756, "step": 200, "valid_targets_mean": 5692.9, "valid_targets_min": 4103 }, { "epoch": 1.3072, "grad_norm": 0.26530637558666487, "learning_rate": 3.698502264989903e-05, "loss": 0.474, "loss_nan_ranks": 0, "loss_rank_avg": 0.0969756618142128, "step": 205, "valid_targets_mean": 4845.5, "valid_targets_min": 2870 }, { "epoch": 1.3392, "grad_norm": 0.24008064698409048, "learning_rate": 3.674589103881432e-05, "loss": 0.4667, "loss_nan_ranks": 0, "loss_rank_avg": 0.12318138033151627, "step": 210, "valid_targets_mean": 5510.6, "valid_targets_min": 1680 }, { "epoch": 1.3712, "grad_norm": 0.25529145100152656, "learning_rate": 3.64984700735293e-05, "loss": 0.4761, "loss_nan_ranks": 0, "loss_rank_avg": 0.12145581841468811, "step": 215, "valid_targets_mean": 5900.8, "valid_targets_min": 2659 }, { "epoch": 1.4032, "grad_norm": 0.25794769045722116, "learning_rate": 3.624288222946273e-05, "loss": 0.484, "loss_nan_ranks": 0, "loss_rank_avg": 0.1101190522313118, "step": 220, "valid_targets_mean": 5558.0, "valid_targets_min": 1262 }, { "epoch": 1.4352, "grad_norm": 0.2782121058872405, "learning_rate": 3.597925402470578e-05, "loss": 0.4809, "loss_nan_ranks": 0, "loss_rank_avg": 0.13891330361366272, "step": 225, "valid_targets_mean": 6163.1, "valid_targets_min": 2175 }, { "epoch": 1.4672, "grad_norm": 0.2511379752274597, "learning_rate": 3.570771595739445e-05, "loss": 0.4669, "loss_nan_ranks": 0, "loss_rank_avg": 0.1152762770652771, "step": 230, "valid_targets_mean": 6515.6, "valid_targets_min": 1578 }, { "epoch": 1.4992, "grad_norm": 0.27590481971133046, "learning_rate": 3.5428402441111964e-05, "loss": 0.4806, "loss_nan_ranks": 0, "loss_rank_avg": 0.12730714678764343, "step": 235, "valid_targets_mean": 5110.8, "valid_targets_min": 1657 }, { "epoch": 1.5312000000000001, "grad_norm": 0.254323226627503, "learning_rate": 3.5141451738352936e-05, "loss": 0.4659, "loss_nan_ranks": 0, "loss_rank_avg": 0.10027951002120972, "step": 240, "valid_targets_mean": 5534.1, "valid_targets_min": 891 }, { "epoch": 1.5632000000000001, "grad_norm": 0.2580174702832105, "learning_rate": 3.4847005892082266e-05, "loss": 0.4675, "loss_nan_ranks": 0, "loss_rank_avg": 0.11096903681755066, "step": 245, "valid_targets_mean": 5662.7, "valid_targets_min": 1644 }, { "epoch": 1.5952, "grad_norm": 0.30698403507646943, "learning_rate": 3.454521065542273e-05, "loss": 0.4859, "loss_nan_ranks": 0, "loss_rank_avg": 0.1288014054298401, "step": 250, "valid_targets_mean": 5168.7, "valid_targets_min": 1162 }, { "epoch": 1.6272, "grad_norm": 0.25805156829059184, "learning_rate": 3.423621541950597e-05, "loss": 0.464, "loss_nan_ranks": 0, "loss_rank_avg": 0.129526287317276, "step": 255, "valid_targets_mean": 7147.4, "valid_targets_min": 3884 }, { "epoch": 1.6592, "grad_norm": 0.23616819648599033, "learning_rate": 3.3920173139522664e-05, "loss": 0.4673, "loss_nan_ranks": 0, "loss_rank_avg": 0.11364764720201492, "step": 260, "valid_targets_mean": 5991.1, "valid_targets_min": 1280 }, { "epoch": 1.6912, "grad_norm": 0.2810806381450421, "learning_rate": 3.35972402590084e-05, "loss": 0.471, "loss_nan_ranks": 0, "loss_rank_avg": 0.1247754693031311, "step": 265, "valid_targets_mean": 5614.1, "valid_targets_min": 1199 }, { "epoch": 1.7231999999999998, "grad_norm": 0.2529117867416003, "learning_rate": 3.326757663240291e-05, "loss": 0.4793, "loss_nan_ranks": 0, "loss_rank_avg": 0.1115766316652298, "step": 270, "valid_targets_mean": 5206.1, "valid_targets_min": 1946 }, { "epoch": 1.7551999999999999, "grad_norm": 0.24073937834464354, "learning_rate": 3.293134544592073e-05, "loss": 0.463, "loss_nan_ranks": 0, "loss_rank_avg": 0.12958145141601562, "step": 275, "valid_targets_mean": 6616.4, "valid_targets_min": 3579 }, { "epoch": 1.7872, "grad_norm": 0.23555835370029385, "learning_rate": 3.258871313677274e-05, "loss": 0.4677, "loss_nan_ranks": 0, "loss_rank_avg": 0.13404905796051025, "step": 280, "valid_targets_mean": 7205.4, "valid_targets_min": 1356 }, { "epoch": 1.8192, "grad_norm": 0.2513641254982323, "learning_rate": 3.2239849310778316e-05, "loss": 0.4636, "loss_nan_ranks": 0, "loss_rank_avg": 0.1133008599281311, "step": 285, "valid_targets_mean": 5637.1, "valid_targets_min": 3779 }, { "epoch": 1.8512, "grad_norm": 0.22918433139423086, "learning_rate": 3.188492665840909e-05, "loss": 0.4649, "loss_nan_ranks": 0, "loss_rank_avg": 0.10168616473674774, "step": 290, "valid_targets_mean": 5500.5, "valid_targets_min": 1523 }, { "epoch": 1.8832, "grad_norm": 0.25315031376778907, "learning_rate": 3.1524120869305726e-05, "loss": 0.4538, "loss_nan_ranks": 0, "loss_rank_avg": 0.11145422607660294, "step": 295, "valid_targets_mean": 4931.9, "valid_targets_min": 1168 }, { "epoch": 1.9152, "grad_norm": 0.22105397485658976, "learning_rate": 3.11576105453101e-05, "loss": 0.4589, "loss_nan_ranks": 0, "loss_rank_avg": 0.11858774721622467, "step": 300, "valid_targets_mean": 6575.4, "valid_targets_min": 4043 }, { "epoch": 1.9472, "grad_norm": 0.2668714334524866, "learning_rate": 3.0785577112055916e-05, "loss": 0.467, "loss_nan_ranks": 0, "loss_rank_avg": 0.1110314130783081, "step": 305, "valid_targets_mean": 4943.4, "valid_targets_min": 3259 }, { "epoch": 1.9792, "grad_norm": 0.27142228891732906, "learning_rate": 3.040820472916153e-05, "loss": 0.474, "loss_nan_ranks": 0, "loss_rank_avg": 0.10838167369365692, "step": 310, "valid_targets_mean": 5198.4, "valid_targets_min": 1470 }, { "epoch": 2.0064, "grad_norm": 0.2567890914244406, "learning_rate": 3.002568019906939e-05, "loss": 0.463, "loss_nan_ranks": 0, "loss_rank_avg": 0.1107785701751709, "step": 315, "valid_targets_mean": 5847.9, "valid_targets_min": 2018 }, { "epoch": 2.0384, "grad_norm": 0.2523608373300584, "learning_rate": 2.963819287457733e-05, "loss": 0.4488, "loss_nan_ranks": 0, "loss_rank_avg": 0.10997898876667023, "step": 320, "valid_targets_mean": 5916.2, "valid_targets_min": 2413 }, { "epoch": 2.0704, "grad_norm": 0.23463415840591811, "learning_rate": 2.924593456510733e-05, "loss": 0.4674, "loss_nan_ranks": 0, "loss_rank_avg": 0.10450360178947449, "step": 325, "valid_targets_mean": 5486.4, "valid_targets_min": 1264 }, { "epoch": 2.1024, "grad_norm": 0.24843951325249042, "learning_rate": 2.8849099441758306e-05, "loss": 0.4558, "loss_nan_ranks": 0, "loss_rank_avg": 0.11403744667768478, "step": 330, "valid_targets_mean": 5439.9, "valid_targets_min": 1138 }, { "epoch": 2.1344, "grad_norm": 0.24610362358361257, "learning_rate": 2.844788394118979e-05, "loss": 0.453, "loss_nan_ranks": 0, "loss_rank_avg": 0.11807750910520554, "step": 335, "valid_targets_mean": 5861.1, "valid_targets_min": 1565 }, { "epoch": 2.1664, "grad_norm": 0.33040794254123873, "learning_rate": 2.8042486668384164e-05, "loss": 0.4545, "loss_nan_ranks": 0, "loss_rank_avg": 0.11304843425750732, "step": 340, "valid_targets_mean": 6610.1, "valid_targets_min": 2124 }, { "epoch": 2.1984, "grad_norm": 0.25162468452460407, "learning_rate": 2.7633108298335582e-05, "loss": 0.4554, "loss_nan_ranks": 0, "loss_rank_avg": 0.11378766596317291, "step": 345, "valid_targets_mean": 5402.4, "valid_targets_min": 1247 }, { "epoch": 2.2304, "grad_norm": 0.25754716013199486, "learning_rate": 2.721995147671416e-05, "loss": 0.4466, "loss_nan_ranks": 0, "loss_rank_avg": 0.10926654934883118, "step": 350, "valid_targets_mean": 6230.7, "valid_targets_min": 1159 }, { "epoch": 2.2624, "grad_norm": 0.25363357284595456, "learning_rate": 2.68032207195547e-05, "loss": 0.4655, "loss_nan_ranks": 0, "loss_rank_avg": 0.12160837650299072, "step": 355, "valid_targets_mean": 5901.7, "valid_targets_min": 1400 }, { "epoch": 2.2944, "grad_norm": 0.2371953068408656, "learning_rate": 2.6383122312019604e-05, "loss": 0.4593, "loss_nan_ranks": 0, "loss_rank_avg": 0.10440811514854431, "step": 360, "valid_targets_mean": 5618.4, "valid_targets_min": 2764 }, { "epoch": 2.3264, "grad_norm": 0.24670634376756886, "learning_rate": 2.595986420628597e-05, "loss": 0.4554, "loss_nan_ranks": 0, "loss_rank_avg": 0.11458228528499603, "step": 365, "valid_targets_mean": 5704.5, "valid_targets_min": 1938 }, { "epoch": 2.3584, "grad_norm": 0.23895194020127508, "learning_rate": 2.5533655918607573e-05, "loss": 0.4585, "loss_nan_ranks": 0, "loss_rank_avg": 0.10346005856990814, "step": 370, "valid_targets_mean": 5528.0, "valid_targets_min": 1275 }, { "epoch": 2.3904, "grad_norm": 0.2408178690827922, "learning_rate": 2.510470842560259e-05, "loss": 0.4519, "loss_nan_ranks": 0, "loss_rank_avg": 0.11147673428058624, "step": 375, "valid_targets_mean": 5869.6, "valid_targets_min": 1637 }, { "epoch": 2.4224, "grad_norm": 0.23255787756846094, "learning_rate": 2.467323405981841e-05, "loss": 0.4607, "loss_nan_ranks": 0, "loss_rank_avg": 0.10699373483657837, "step": 380, "valid_targets_mean": 5419.7, "valid_targets_min": 2956 }, { "epoch": 2.4544, "grad_norm": 0.24230598627352132, "learning_rate": 2.423944640462533e-05, "loss": 0.4633, "loss_nan_ranks": 0, "loss_rank_avg": 0.10441893339157104, "step": 385, "valid_targets_mean": 4641.1, "valid_targets_min": 1544 }, { "epoch": 2.4864, "grad_norm": 0.24609359447675086, "learning_rate": 2.3803560188490968e-05, "loss": 0.4507, "loss_nan_ranks": 0, "loss_rank_avg": 0.13390594720840454, "step": 390, "valid_targets_mean": 6129.4, "valid_targets_min": 1506 }, { "epoch": 2.5183999999999997, "grad_norm": 0.2450309981216017, "learning_rate": 2.336579117868789e-05, "loss": 0.4505, "loss_nan_ranks": 0, "loss_rank_avg": 0.09820785373449326, "step": 395, "valid_targets_mean": 4850.6, "valid_targets_min": 1443 }, { "epoch": 2.5504, "grad_norm": 0.26353410163682456, "learning_rate": 2.292635607448711e-05, "loss": 0.4558, "loss_nan_ranks": 0, "loss_rank_avg": 0.12506261467933655, "step": 400, "valid_targets_mean": 6348.2, "valid_targets_min": 3938 }, { "epoch": 2.5824, "grad_norm": 0.2591867666837128, "learning_rate": 2.248547239989008e-05, "loss": 0.4644, "loss_nan_ranks": 0, "loss_rank_avg": 0.1142532080411911, "step": 405, "valid_targets_mean": 5473.3, "valid_targets_min": 3717 }, { "epoch": 2.6144, "grad_norm": 0.23939041830365473, "learning_rate": 2.204335839595255e-05, "loss": 0.4579, "loss_nan_ranks": 0, "loss_rank_avg": 0.126010924577713, "step": 410, "valid_targets_mean": 7091.4, "valid_targets_min": 2590 }, { "epoch": 2.6464, "grad_norm": 0.27523073180915464, "learning_rate": 2.1600232912753452e-05, "loss": 0.4524, "loss_nan_ranks": 0, "loss_rank_avg": 0.12722241878509521, "step": 415, "valid_targets_mean": 5683.2, "valid_targets_min": 1399 }, { "epoch": 2.6784, "grad_norm": 0.2555993394486925, "learning_rate": 2.1156315301062293e-05, "loss": 0.4419, "loss_nan_ranks": 0, "loss_rank_avg": 0.10284954309463501, "step": 420, "valid_targets_mean": 5005.5, "valid_targets_min": 934 }, { "epoch": 2.7104, "grad_norm": 0.26731870473795194, "learning_rate": 2.0711825303758712e-05, "loss": 0.4433, "loss_nan_ranks": 0, "loss_rank_avg": 0.11506682634353638, "step": 425, "valid_targets_mean": 6046.0, "valid_targets_min": 1647 }, { "epoch": 2.7424, "grad_norm": 0.2407561884924848, "learning_rate": 2.0266982947057962e-05, "loss": 0.4559, "loss_nan_ranks": 0, "loss_rank_avg": 0.10325722396373749, "step": 430, "valid_targets_mean": 5949.6, "valid_targets_min": 2688 }, { "epoch": 2.7744, "grad_norm": 0.2336404714487898, "learning_rate": 1.9822008431596083e-05, "loss": 0.4522, "loss_nan_ranks": 0, "loss_rank_avg": 0.10586457699537277, "step": 435, "valid_targets_mean": 6859.5, "valid_targets_min": 4222 }, { "epoch": 2.8064, "grad_norm": 0.22137813612500373, "learning_rate": 1.937712202342881e-05, "loss": 0.4408, "loss_nan_ranks": 0, "loss_rank_avg": 0.12141789495944977, "step": 440, "valid_targets_mean": 6645.0, "valid_targets_min": 3372 }, { "epoch": 2.8384, "grad_norm": 0.2361975809490191, "learning_rate": 1.8932543944998037e-05, "loss": 0.4621, "loss_nan_ranks": 0, "loss_rank_avg": 0.11469966918230057, "step": 445, "valid_targets_mean": 6750.8, "valid_targets_min": 3870 }, { "epoch": 2.8704, "grad_norm": 0.2569963385739716, "learning_rate": 1.8488494266119877e-05, "loss": 0.4558, "loss_nan_ranks": 0, "loss_rank_avg": 0.10432308912277222, "step": 450, "valid_targets_mean": 5330.1, "valid_targets_min": 1132 }, { "epoch": 2.9024, "grad_norm": 0.23032242260685118, "learning_rate": 1.804519279504834e-05, "loss": 0.4526, "loss_nan_ranks": 0, "loss_rank_avg": 0.11718428134918213, "step": 455, "valid_targets_mean": 6316.1, "valid_targets_min": 881 }, { "epoch": 2.9344, "grad_norm": 0.23444798682474463, "learning_rate": 1.7602858969668365e-05, "loss": 0.4619, "loss_nan_ranks": 0, "loss_rank_avg": 0.10966004431247711, "step": 460, "valid_targets_mean": 5196.4, "valid_targets_min": 1548 }, { "epoch": 2.9664, "grad_norm": 0.24127707453224004, "learning_rate": 1.716171174887231e-05, "loss": 0.4417, "loss_nan_ranks": 0, "loss_rank_avg": 0.10286953300237656, "step": 465, "valid_targets_mean": 5809.4, "valid_targets_min": 1703 }, { "epoch": 2.9984, "grad_norm": 0.24085134696567514, "learning_rate": 1.6721969504173484e-05, "loss": 0.443, "loss_nan_ranks": 0, "loss_rank_avg": 0.11106819659471512, "step": 470, "valid_targets_mean": 5545.3, "valid_targets_min": 1533 }, { "epoch": 3.0256, "grad_norm": 0.2541813174463172, "learning_rate": 1.628384991161041e-05, "loss": 0.4528, "loss_nan_ranks": 0, "loss_rank_avg": 0.11498390138149261, "step": 475, "valid_targets_mean": 5463.5, "valid_targets_min": 1925 }, { "epoch": 3.0576, "grad_norm": 0.2353622062322344, "learning_rate": 1.5847569843995452e-05, "loss": 0.4449, "loss_nan_ranks": 0, "loss_rank_avg": 0.1087237298488617, "step": 480, "valid_targets_mean": 5840.9, "valid_targets_min": 1507 }, { "epoch": 3.0896, "grad_norm": 0.22580596321042834, "learning_rate": 1.5413345263560922e-05, "loss": 0.4421, "loss_nan_ranks": 0, "loss_rank_avg": 0.11730214208364487, "step": 485, "valid_targets_mean": 7615.9, "valid_targets_min": 4895 }, { "epoch": 3.1216, "grad_norm": 0.2346986705939085, "learning_rate": 1.4981391115056032e-05, "loss": 0.4484, "loss_nan_ranks": 0, "loss_rank_avg": 0.08560355007648468, "step": 490, "valid_targets_mean": 5496.8, "valid_targets_min": 2426 }, { "epoch": 3.1536, "grad_norm": 0.2474046202773989, "learning_rate": 1.455192121934748e-05, "loss": 0.4392, "loss_nan_ranks": 0, "loss_rank_avg": 0.10706610977649689, "step": 495, "valid_targets_mean": 6306.4, "valid_targets_min": 1468 }, { "epoch": 3.1856, "grad_norm": 0.24598088386643885, "learning_rate": 1.4125148167576303e-05, "loss": 0.4517, "loss_nan_ranks": 0, "loss_rank_avg": 0.0977165475487709, "step": 500, "valid_targets_mean": 5140.1, "valid_targets_min": 2124 }, { "epoch": 3.2176, "grad_norm": 0.23772157152624504, "learning_rate": 1.3701283215923563e-05, "loss": 0.4515, "loss_nan_ranks": 0, "loss_rank_avg": 0.12374231219291687, "step": 505, "valid_targets_mean": 6650.8, "valid_targets_min": 2146 }, { "epoch": 3.2496, "grad_norm": 0.2381861982363929, "learning_rate": 1.328053618103677e-05, "loss": 0.4524, "loss_nan_ranks": 0, "loss_rank_avg": 0.08905310928821564, "step": 510, "valid_targets_mean": 4238.1, "valid_targets_min": 1246 }, { "epoch": 3.2816, "grad_norm": 0.24129917399571515, "learning_rate": 1.2863115336168916e-05, "loss": 0.4424, "loss_nan_ranks": 0, "loss_rank_avg": 0.10858754813671112, "step": 515, "valid_targets_mean": 5508.2, "valid_targets_min": 1990 }, { "epoch": 3.3136, "grad_norm": 0.2299768195746735, "learning_rate": 1.2449227308081509e-05, "loss": 0.4305, "loss_nan_ranks": 0, "loss_rank_avg": 0.10494785010814667, "step": 520, "valid_targets_mean": 5587.0, "valid_targets_min": 1414 }, { "epoch": 3.3456, "grad_norm": 0.22756287907781986, "learning_rate": 1.2039076974762587e-05, "loss": 0.4412, "loss_nan_ranks": 0, "loss_rank_avg": 0.09129714220762253, "step": 525, "valid_targets_mean": 5011.6, "valid_targets_min": 1275 }, { "epoch": 3.3776, "grad_norm": 0.2490236621582409, "learning_rate": 1.163286736401044e-05, "loss": 0.4353, "loss_nan_ranks": 0, "loss_rank_avg": 0.10486893355846405, "step": 530, "valid_targets_mean": 5423.1, "valid_targets_min": 1137 }, { "epoch": 3.4096, "grad_norm": 0.22017430759615417, "learning_rate": 1.123079955293322e-05, "loss": 0.4427, "loss_nan_ranks": 0, "loss_rank_avg": 0.1027197316288948, "step": 535, "valid_targets_mean": 5919.5, "valid_targets_min": 1816 }, { "epoch": 3.4416, "grad_norm": 0.2140066307199504, "learning_rate": 1.0833072568414037e-05, "loss": 0.4454, "loss_nan_ranks": 0, "loss_rank_avg": 0.11336848139762878, "step": 540, "valid_targets_mean": 7503.7, "valid_targets_min": 4812 }, { "epoch": 3.4736000000000002, "grad_norm": 0.23150362740195682, "learning_rate": 1.0439883288591057e-05, "loss": 0.4561, "loss_nan_ranks": 0, "loss_rank_avg": 0.11124958097934723, "step": 545, "valid_targets_mean": 6104.5, "valid_targets_min": 3593 }, { "epoch": 3.5056000000000003, "grad_norm": 0.2239460927463848, "learning_rate": 1.0051426345401202e-05, "loss": 0.4383, "loss_nan_ranks": 0, "loss_rank_avg": 0.08680635690689087, "step": 550, "valid_targets_mean": 4237.5, "valid_targets_min": 1464 }, { "epoch": 3.5376, "grad_norm": 0.2276675294214827, "learning_rate": 9.667894028235704e-06, "loss": 0.4512, "loss_nan_ranks": 0, "loss_rank_avg": 0.12256583571434021, "step": 555, "valid_targets_mean": 6583.3, "valid_targets_min": 3849 }, { "epoch": 3.5696, "grad_norm": 0.23874452325722306, "learning_rate": 9.289476188755315e-06, "loss": 0.4354, "loss_nan_ranks": 0, "loss_rank_avg": 0.10612036287784576, "step": 560, "valid_targets_mean": 6009.8, "valid_targets_min": 2688 }, { "epoch": 3.6016, "grad_norm": 0.21514329189036926, "learning_rate": 8.916360146912122e-06, "loss": 0.4469, "loss_nan_ranks": 0, "loss_rank_avg": 0.11689209938049316, "step": 565, "valid_targets_mean": 6188.1, "valid_targets_min": 3993 }, { "epoch": 3.6336, "grad_norm": 0.2362020094484568, "learning_rate": 8.548730598224646e-06, "loss": 0.4484, "loss_nan_ranks": 0, "loss_rank_avg": 0.11842970550060272, "step": 570, "valid_targets_mean": 5449.2, "valid_targets_min": 1275 }, { "epoch": 3.6656, "grad_norm": 0.2401716944941585, "learning_rate": 8.186769522352053e-06, "loss": 0.4512, "loss_nan_ranks": 0, "loss_rank_avg": 0.11459657549858093, "step": 575, "valid_targets_mean": 5901.6, "valid_targets_min": 3112 }, { "epoch": 3.6976, "grad_norm": 0.2260946660210682, "learning_rate": 7.830656093012714e-06, "loss": 0.45, "loss_nan_ranks": 0, "loss_rank_avg": 0.11391419172286987, "step": 580, "valid_targets_mean": 6314.2, "valid_targets_min": 2536 }, { "epoch": 3.7296, "grad_norm": 0.23490628110150955, "learning_rate": 7.480566589291696e-06, "loss": 0.4402, "loss_nan_ranks": 0, "loss_rank_avg": 0.1118554174900055, "step": 585, "valid_targets_mean": 6000.4, "valid_targets_min": 2420 }, { "epoch": 3.7616, "grad_norm": 0.22394540910322736, "learning_rate": 7.1366743083812285e-06, "loss": 0.4336, "loss_nan_ranks": 0, "loss_rank_avg": 0.11416637897491455, "step": 590, "valid_targets_mean": 5651.9, "valid_targets_min": 1312 }, { "epoch": 3.7936, "grad_norm": 0.23434924832059395, "learning_rate": 6.799149479797101e-06, "loss": 0.4524, "loss_nan_ranks": 0, "loss_rank_avg": 0.1163535937666893, "step": 595, "valid_targets_mean": 6361.0, "valid_targets_min": 3963 }, { "epoch": 3.8256, "grad_norm": 0.20434011194504567, "learning_rate": 6.4681591811137e-06, "loss": 0.431, "loss_nan_ranks": 0, "loss_rank_avg": 0.10242234170436859, "step": 600, "valid_targets_mean": 6733.9, "valid_targets_min": 2050 }, { "epoch": 3.8576, "grad_norm": 0.2205683173173195, "learning_rate": 6.143867255259197e-06, "loss": 0.4469, "loss_nan_ranks": 0, "loss_rank_avg": 0.10782508552074432, "step": 605, "valid_targets_mean": 5887.1, "valid_targets_min": 1264 }, { "epoch": 3.8895999999999997, "grad_norm": 0.22196701228922652, "learning_rate": 5.8264342294119504e-06, "loss": 0.4507, "loss_nan_ranks": 0, "loss_rank_avg": 0.09781044721603394, "step": 610, "valid_targets_mean": 6692.9, "valid_targets_min": 3335 }, { "epoch": 3.9215999999999998, "grad_norm": 0.23419677990052767, "learning_rate": 5.516017235538258e-06, "loss": 0.4398, "loss_nan_ranks": 0, "loss_rank_avg": 0.088894322514534, "step": 615, "valid_targets_mean": 5359.7, "valid_targets_min": 1567 }, { "epoch": 3.9536, "grad_norm": 0.20991276705753859, "learning_rate": 5.212769932610695e-06, "loss": 0.4465, "loss_nan_ranks": 0, "loss_rank_avg": 0.08706802129745483, "step": 620, "valid_targets_mean": 5569.8, "valid_targets_min": 1280 }, { "epoch": 3.9856, "grad_norm": 0.2305681012598391, "learning_rate": 4.916842430545681e-06, "loss": 0.4464, "loss_nan_ranks": 0, "loss_rank_avg": 0.10766085982322693, "step": 625, "valid_targets_mean": 6046.6, "valid_targets_min": 2956 }, { "epoch": 4.0128, "grad_norm": 0.23248030070161385, "learning_rate": 4.628381215897837e-06, "loss": 0.4368, "loss_nan_ranks": 0, "loss_rank_avg": 0.09869658201932907, "step": 630, "valid_targets_mean": 5463.2, "valid_targets_min": 1626 }, { "epoch": 4.0448, "grad_norm": 0.2320693470818531, "learning_rate": 4.347529079347914e-06, "loss": 0.4401, "loss_nan_ranks": 0, "loss_rank_avg": 0.12055571377277374, "step": 635, "valid_targets_mean": 5899.8, "valid_targets_min": 4103 }, { "epoch": 4.0768, "grad_norm": 0.2376324397286371, "learning_rate": 4.074425045020247e-06, "loss": 0.4506, "loss_nan_ranks": 0, "loss_rank_avg": 0.10627171397209167, "step": 640, "valid_targets_mean": 4855.4, "valid_targets_min": 1443 }, { "epoch": 4.1088, "grad_norm": 0.23365976592481544, "learning_rate": 3.8092043016646487e-06, "loss": 0.4392, "loss_nan_ranks": 0, "loss_rank_avg": 0.11408081650733948, "step": 645, "valid_targets_mean": 5649.8, "valid_targets_min": 1275 }, { "epoch": 4.1408, "grad_norm": 0.21456883184147094, "learning_rate": 3.551998135736867e-06, "loss": 0.4316, "loss_nan_ranks": 0, "loss_rank_avg": 0.10515138506889343, "step": 650, "valid_targets_mean": 5795.8, "valid_targets_min": 1994 }, { "epoch": 4.1728, "grad_norm": 0.23159726143523057, "learning_rate": 3.3029338664107267e-06, "loss": 0.4436, "loss_nan_ranks": 0, "loss_rank_avg": 0.12827849388122559, "step": 655, "valid_targets_mean": 6799.2, "valid_targets_min": 4330 }, { "epoch": 4.2048, "grad_norm": 0.2267271521046096, "learning_rate": 3.0621347825540625e-06, "loss": 0.4335, "loss_nan_ranks": 0, "loss_rank_avg": 0.11244725435972214, "step": 660, "valid_targets_mean": 5340.2, "valid_targets_min": 2584 }, { "epoch": 4.2368, "grad_norm": 0.20149080037204617, "learning_rate": 2.8297200816997183e-06, "loss": 0.4342, "loss_nan_ranks": 0, "loss_rank_avg": 0.09943097829818726, "step": 665, "valid_targets_mean": 5967.9, "valid_targets_min": 1349 }, { "epoch": 4.2688, "grad_norm": 0.22316435861473205, "learning_rate": 2.605804811041803e-06, "loss": 0.4475, "loss_nan_ranks": 0, "loss_rank_avg": 0.1142878532409668, "step": 670, "valid_targets_mean": 5788.6, "valid_targets_min": 1504 }, { "epoch": 4.3008, "grad_norm": 0.23749043061782324, "learning_rate": 2.390499810486351e-06, "loss": 0.4398, "loss_nan_ranks": 0, "loss_rank_avg": 0.09940489381551743, "step": 675, "valid_targets_mean": 5341.0, "valid_targets_min": 1211 }, { "epoch": 4.3328, "grad_norm": 0.21705720222967442, "learning_rate": 2.183911657784685e-06, "loss": 0.4384, "loss_nan_ranks": 0, "loss_rank_avg": 0.10698309540748596, "step": 680, "valid_targets_mean": 6206.0, "valid_targets_min": 1104 }, { "epoch": 4.3648, "grad_norm": 0.2359689779618734, "learning_rate": 1.986142615776532e-06, "loss": 0.4354, "loss_nan_ranks": 0, "loss_rank_avg": 0.12712863087654114, "step": 685, "valid_targets_mean": 6414.2, "valid_targets_min": 2090 }, { "epoch": 4.3968, "grad_norm": 0.2270181783968223, "learning_rate": 1.7972905817690644e-06, "loss": 0.4482, "loss_nan_ranks": 0, "loss_rank_avg": 0.11009643971920013, "step": 690, "valid_targets_mean": 5408.8, "valid_targets_min": 1740 }, { "epoch": 4.4288, "grad_norm": 0.1982686366700752, "learning_rate": 1.617449039076955e-06, "loss": 0.4467, "loss_nan_ranks": 0, "loss_rank_avg": 0.11948183923959732, "step": 695, "valid_targets_mean": 8110.3, "valid_targets_min": 1565 }, { "epoch": 4.4608, "grad_norm": 0.22182051002480957, "learning_rate": 1.4467070107473413e-06, "loss": 0.4407, "loss_nan_ranks": 0, "loss_rank_avg": 0.09709776937961578, "step": 700, "valid_targets_mean": 5159.9, "valid_targets_min": 3161 }, { "epoch": 4.4928, "grad_norm": 0.21510025780610673, "learning_rate": 1.2851490154926816e-06, "loss": 0.4475, "loss_nan_ranks": 0, "loss_rank_avg": 0.11582086980342865, "step": 705, "valid_targets_mean": 6414.6, "valid_targets_min": 1497 }, { "epoch": 4.5248, "grad_norm": 0.20748343105814038, "learning_rate": 1.1328550258533211e-06, "loss": 0.4444, "loss_nan_ranks": 0, "loss_rank_avg": 0.12172878533601761, "step": 710, "valid_targets_mean": 6439.6, "valid_targets_min": 3898 }, { "epoch": 4.5568, "grad_norm": 0.21381893999640084, "learning_rate": 9.899004286103953e-07, "loss": 0.4367, "loss_nan_ranks": 0, "loss_rank_avg": 0.10774854570627213, "step": 715, "valid_targets_mean": 6118.2, "valid_targets_min": 1434 }, { "epoch": 4.5888, "grad_norm": 0.22561568200277468, "learning_rate": 8.5635598746876e-07, "loss": 0.4344, "loss_nan_ranks": 0, "loss_rank_avg": 0.11041155457496643, "step": 720, "valid_targets_mean": 6051.2, "valid_targets_min": 3805 }, { "epoch": 4.6208, "grad_norm": 0.20402603591723495, "learning_rate": 7.32287808028389e-07, "loss": 0.4338, "loss_nan_ranks": 0, "loss_rank_avg": 0.11558706313371658, "step": 725, "valid_targets_mean": 6586.0, "valid_targets_min": 4236 }, { "epoch": 4.6528, "grad_norm": 0.24563597589147643, "learning_rate": 6.177573050615327e-07, "loss": 0.4304, "loss_nan_ranks": 0, "loss_rank_avg": 0.09163513034582138, "step": 730, "valid_targets_mean": 4575.3, "valid_targets_min": 1257 }, { "epoch": 4.6848, "grad_norm": 0.21645901836986417, "learning_rate": 5.128211721119213e-07, "loss": 0.4337, "loss_nan_ranks": 0, "loss_rank_avg": 0.11745560169219971, "step": 735, "valid_targets_mean": 6024.5, "valid_targets_min": 1402 }, { "epoch": 4.7168, "grad_norm": 0.21851740632245759, "learning_rate": 4.175313534309755e-07, "loss": 0.4366, "loss_nan_ranks": 0, "loss_rank_avg": 0.10883457958698273, "step": 740, "valid_targets_mean": 6283.2, "valid_targets_min": 1555 }, { "epoch": 4.7488, "grad_norm": 0.21932736290236435, "learning_rate": 3.319350182649861e-07, "loss": 0.4468, "loss_nan_ranks": 0, "loss_rank_avg": 0.10903538763523102, "step": 745, "valid_targets_mean": 6333.0, "valid_targets_min": 1504 }, { "epoch": 4.7808, "grad_norm": 0.22616118624010112, "learning_rate": 2.560745375059392e-07, "loss": 0.4382, "loss_nan_ranks": 0, "loss_rank_avg": 0.11657419800758362, "step": 750, "valid_targets_mean": 7166.1, "valid_targets_min": 3048 }, { "epoch": 4.8128, "grad_norm": 0.22643827545429354, "learning_rate": 1.8998746271758016e-07, "loss": 0.4369, "loss_nan_ranks": 0, "loss_rank_avg": 0.10837658494710922, "step": 755, "valid_targets_mean": 5800.5, "valid_targets_min": 3765 }, { "epoch": 4.8448, "grad_norm": 0.21056524475735883, "learning_rate": 1.337065075470778e-07, "loss": 0.4389, "loss_nan_ranks": 0, "loss_rank_avg": 0.10905088484287262, "step": 760, "valid_targets_mean": 6031.9, "valid_targets_min": 3851 }, { "epoch": 4.8768, "grad_norm": 0.22673914171528292, "learning_rate": 8.725953153150279e-08, "loss": 0.4299, "loss_nan_ranks": 0, "loss_rank_avg": 0.11067554354667664, "step": 765, "valid_targets_mean": 5955.0, "valid_targets_min": 3870 }, { "epoch": 4.9088, "grad_norm": 0.2234901980490257, "learning_rate": 5.066952630711886e-08, "loss": 0.459, "loss_nan_ranks": 0, "loss_rank_avg": 0.13258133828639984, "step": 770, "valid_targets_mean": 5992.2, "valid_targets_min": 1472 }, { "epoch": 4.9408, "grad_norm": 0.22776223805560375, "learning_rate": 2.3954604228342283e-08, "loss": 0.4454, "loss_nan_ranks": 0, "loss_rank_avg": 0.10461519658565521, "step": 775, "valid_targets_mean": 5661.4, "valid_targets_min": 1507 }, { "epoch": 4.9728, "grad_norm": 0.2189412079803582, "learning_rate": 7.12798940197601e-09, "loss": 0.439, "loss_nan_ranks": 0, "loss_rank_avg": 0.11273640394210815, "step": 780, "valid_targets_mean": 5641.1, "valid_targets_min": 2808 }, { "epoch": 5.0, "grad_norm": 0.381336408442084, "learning_rate": 1.9801114115480802e-10, "loss": 0.4311, "loss_nan_ranks": 0, "loss_rank_avg": 0.3657087981700897, "step": 785, "valid_targets_mean": 6505.4, "valid_targets_min": 1469 }, { "epoch": 5.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.3657087981700897, "step": 785, "total_flos": 2.0706726654792172e+18, "train_loss": 0.47480044941993277, "train_runtime": 19659.7391, "train_samples_per_second": 2.543, "train_steps_per_second": 0.04, "valid_targets_mean": 6505.4, "valid_targets_min": 1469 } ], "logging_steps": 5, "max_steps": 785, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.0706726654792172e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }