| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 785, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 5.453780786232042, | |
| "learning_rate": 2.0253164556962026e-06, | |
| "loss": 0.8032, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20371782779693604, | |
| "step": 5, | |
| "valid_targets_mean": 6761.0, | |
| "valid_targets_min": 1767 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 3.792389792983448, | |
| "learning_rate": 4.556962025316456e-06, | |
| "loss": 0.7643, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1712636798620224, | |
| "step": 10, | |
| "valid_targets_mean": 4999.5, | |
| "valid_targets_min": 1304 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 2.063032028483624, | |
| "learning_rate": 7.08860759493671e-06, | |
| "loss": 0.7318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17536824941635132, | |
| "step": 15, | |
| "valid_targets_mean": 5833.2, | |
| "valid_targets_min": 1211 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.8871678160368911, | |
| "learning_rate": 9.620253164556963e-06, | |
| "loss": 0.6933, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18351516127586365, | |
| "step": 20, | |
| "valid_targets_mean": 6014.7, | |
| "valid_targets_min": 3717 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.7382530806327504, | |
| "learning_rate": 1.2151898734177216e-05, | |
| "loss": 0.6776, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1541610211133957, | |
| "step": 25, | |
| "valid_targets_mean": 5068.2, | |
| "valid_targets_min": 1347 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.514129978379908, | |
| "learning_rate": 1.468354430379747e-05, | |
| "loss": 0.6547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15168946981430054, | |
| "step": 30, | |
| "valid_targets_mean": 5006.1, | |
| "valid_targets_min": 881 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 0.4336197541753502, | |
| "learning_rate": 1.7215189873417723e-05, | |
| "loss": 0.6208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14679864048957825, | |
| "step": 35, | |
| "valid_targets_mean": 5579.1, | |
| "valid_targets_min": 1357 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.36433161326142466, | |
| "learning_rate": 1.974683544303798e-05, | |
| "loss": 0.6069, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15517079830169678, | |
| "step": 40, | |
| "valid_targets_mean": 5445.0, | |
| "valid_targets_min": 1474 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.32965171941859056, | |
| "learning_rate": 2.2278481012658228e-05, | |
| "loss": 0.5728, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14498282968997955, | |
| "step": 45, | |
| "valid_targets_mean": 6170.8, | |
| "valid_targets_min": 3922 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.2872624196401524, | |
| "learning_rate": 2.481012658227848e-05, | |
| "loss": 0.5765, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1473766267299652, | |
| "step": 50, | |
| "valid_targets_mean": 6663.2, | |
| "valid_targets_min": 4765 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 0.27100975287589185, | |
| "learning_rate": 2.7341772151898737e-05, | |
| "loss": 0.5616, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14131703972816467, | |
| "step": 55, | |
| "valid_targets_mean": 5866.8, | |
| "valid_targets_min": 1425 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.2637945073634897, | |
| "learning_rate": 2.987341772151899e-05, | |
| "loss": 0.5483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13468310236930847, | |
| "step": 60, | |
| "valid_targets_mean": 5623.7, | |
| "valid_targets_min": 1606 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 0.2609484255153037, | |
| "learning_rate": 3.240506329113924e-05, | |
| "loss": 0.5464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10717941075563431, | |
| "step": 65, | |
| "valid_targets_mean": 5325.4, | |
| "valid_targets_min": 1780 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.2475619693697882, | |
| "learning_rate": 3.49367088607595e-05, | |
| "loss": 0.5473, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14007271826267242, | |
| "step": 70, | |
| "valid_targets_mean": 5930.2, | |
| "valid_targets_min": 1506 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.24464093539865395, | |
| "learning_rate": 3.746835443037975e-05, | |
| "loss": 0.5233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12224975228309631, | |
| "step": 75, | |
| "valid_targets_mean": 5512.3, | |
| "valid_targets_min": 2940 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.22999667928124048, | |
| "learning_rate": 4e-05, | |
| "loss": 0.5253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12176463007926941, | |
| "step": 80, | |
| "valid_targets_mean": 5304.3, | |
| "valid_targets_min": 3470 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 0.23934636533674483, | |
| "learning_rate": 3.999504991751045e-05, | |
| "loss": 0.5338, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13291212916374207, | |
| "step": 85, | |
| "valid_targets_mean": 5400.6, | |
| "valid_targets_min": 1624 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.22974570019649057, | |
| "learning_rate": 3.9980202120373464e-05, | |
| "loss": 0.5139, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12326923757791519, | |
| "step": 90, | |
| "valid_targets_mean": 4998.4, | |
| "valid_targets_min": 1247 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 0.2926220288463877, | |
| "learning_rate": 3.995546395837111e-05, | |
| "loss": 0.5038, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1356104016304016, | |
| "step": 95, | |
| "valid_targets_mean": 5303.9, | |
| "valid_targets_min": 1326 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.23427791767385714, | |
| "learning_rate": 3.992084767709763e-05, | |
| "loss": 0.5066, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1405172497034073, | |
| "step": 100, | |
| "valid_targets_mean": 5991.0, | |
| "valid_targets_min": 3481 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 0.28437672036074757, | |
| "learning_rate": 3.987637041189781e-05, | |
| "loss": 0.512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1347738653421402, | |
| "step": 105, | |
| "valid_targets_mean": 6278.3, | |
| "valid_targets_min": 2344 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.24924033321191602, | |
| "learning_rate": 3.982205417938482e-05, | |
| "loss": 0.4975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11735053360462189, | |
| "step": 110, | |
| "valid_targets_mean": 5851.7, | |
| "valid_targets_min": 1129 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 0.2507030670655199, | |
| "learning_rate": 3.975792586654179e-05, | |
| "loss": 0.5084, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12411265820264816, | |
| "step": 115, | |
| "valid_targets_mean": 5674.4, | |
| "valid_targets_min": 3051 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.2500377635060955, | |
| "learning_rate": 3.968401721741259e-05, | |
| "loss": 0.5022, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12196642905473709, | |
| "step": 120, | |
| "valid_targets_mean": 6003.1, | |
| "valid_targets_min": 1515 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.24135329786073206, | |
| "learning_rate": 3.960036481738819e-05, | |
| "loss": 0.5008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12246815860271454, | |
| "step": 125, | |
| "valid_targets_mean": 5971.2, | |
| "valid_targets_min": 2866 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 0.22519547761877387, | |
| "learning_rate": 3.950701007509667e-05, | |
| "loss": 0.4981, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09827093780040741, | |
| "step": 130, | |
| "valid_targets_mean": 5276.2, | |
| "valid_targets_min": 1312 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 0.24777405687491888, | |
| "learning_rate": 3.940399920190552e-05, | |
| "loss": 0.4924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1192011833190918, | |
| "step": 135, | |
| "valid_targets_mean": 5829.4, | |
| "valid_targets_min": 1973 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 0.2461001388229048, | |
| "learning_rate": 3.92913831890467e-05, | |
| "loss": 0.5026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.127201110124588, | |
| "step": 140, | |
| "valid_targets_mean": 6177.5, | |
| "valid_targets_min": 3491 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 0.26205526754490166, | |
| "learning_rate": 3.916921778237556e-05, | |
| "loss": 0.4921, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12378434836864471, | |
| "step": 145, | |
| "valid_targets_mean": 6577.8, | |
| "valid_targets_min": 2300 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.2527022155127535, | |
| "learning_rate": 3.903756345477612e-05, | |
| "loss": 0.4917, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11821117997169495, | |
| "step": 150, | |
| "valid_targets_mean": 5569.9, | |
| "valid_targets_min": 1507 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 0.24869143664799934, | |
| "learning_rate": 3.889648537622657e-05, | |
| "loss": 0.482, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11581720411777496, | |
| "step": 155, | |
| "valid_targets_mean": 5312.9, | |
| "valid_targets_min": 2593 | |
| }, | |
| { | |
| "epoch": 1.0192, | |
| "grad_norm": 0.23417657797639116, | |
| "learning_rate": 3.874605338153952e-05, | |
| "loss": 0.4875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1262468695640564, | |
| "step": 160, | |
| "valid_targets_mean": 6192.8, | |
| "valid_targets_min": 4127 | |
| }, | |
| { | |
| "epoch": 1.0512, | |
| "grad_norm": 0.27974191809541077, | |
| "learning_rate": 3.8586341935793265e-05, | |
| "loss": 0.4818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12143966555595398, | |
| "step": 165, | |
| "valid_targets_mean": 5954.1, | |
| "valid_targets_min": 1743 | |
| }, | |
| { | |
| "epoch": 1.0832, | |
| "grad_norm": 0.25435642517002544, | |
| "learning_rate": 3.841743009747089e-05, | |
| "loss": 0.4815, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13246093690395355, | |
| "step": 170, | |
| "valid_targets_mean": 6993.4, | |
| "valid_targets_min": 3801 | |
| }, | |
| { | |
| "epoch": 1.1152, | |
| "grad_norm": 0.25881160063054937, | |
| "learning_rate": 3.8239401479325714e-05, | |
| "loss": 0.4693, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11434349417686462, | |
| "step": 175, | |
| "valid_targets_mean": 4978.4, | |
| "valid_targets_min": 1260 | |
| }, | |
| { | |
| "epoch": 1.1472, | |
| "grad_norm": 0.2278502886084013, | |
| "learning_rate": 3.8052344206992276e-05, | |
| "loss": 0.4787, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12233040481805801, | |
| "step": 180, | |
| "valid_targets_mean": 7108.7, | |
| "valid_targets_min": 2155 | |
| }, | |
| { | |
| "epoch": 1.1792, | |
| "grad_norm": 0.25565502704610965, | |
| "learning_rate": 3.7856350875363396e-05, | |
| "loss": 0.4801, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11371868848800659, | |
| "step": 185, | |
| "valid_targets_mean": 6057.6, | |
| "valid_targets_min": 3039 | |
| }, | |
| { | |
| "epoch": 1.2112, | |
| "grad_norm": 0.24238202719508706, | |
| "learning_rate": 3.765151850275497e-05, | |
| "loss": 0.475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10759443044662476, | |
| "step": 190, | |
| "valid_targets_mean": 5236.0, | |
| "valid_targets_min": 1548 | |
| }, | |
| { | |
| "epoch": 1.2432, | |
| "grad_norm": 0.2707586320724312, | |
| "learning_rate": 3.7437948482881104e-05, | |
| "loss": 0.4838, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10973644256591797, | |
| "step": 195, | |
| "valid_targets_mean": 4837.2, | |
| "valid_targets_min": 1166 | |
| }, | |
| { | |
| "epoch": 1.2752, | |
| "grad_norm": 0.26143437324759183, | |
| "learning_rate": 3.721574653466336e-05, | |
| "loss": 0.4737, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13594186305999756, | |
| "step": 200, | |
| "valid_targets_mean": 5692.9, | |
| "valid_targets_min": 4103 | |
| }, | |
| { | |
| "epoch": 1.3072, | |
| "grad_norm": 0.26530637558666487, | |
| "learning_rate": 3.698502264989903e-05, | |
| "loss": 0.474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0969756618142128, | |
| "step": 205, | |
| "valid_targets_mean": 4845.5, | |
| "valid_targets_min": 2870 | |
| }, | |
| { | |
| "epoch": 1.3392, | |
| "grad_norm": 0.24008064698409048, | |
| "learning_rate": 3.674589103881432e-05, | |
| "loss": 0.4667, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12318138033151627, | |
| "step": 210, | |
| "valid_targets_mean": 5510.6, | |
| "valid_targets_min": 1680 | |
| }, | |
| { | |
| "epoch": 1.3712, | |
| "grad_norm": 0.25529145100152656, | |
| "learning_rate": 3.64984700735293e-05, | |
| "loss": 0.4761, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12145581841468811, | |
| "step": 215, | |
| "valid_targets_mean": 5900.8, | |
| "valid_targets_min": 2659 | |
| }, | |
| { | |
| "epoch": 1.4032, | |
| "grad_norm": 0.25794769045722116, | |
| "learning_rate": 3.624288222946273e-05, | |
| "loss": 0.484, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1101190522313118, | |
| "step": 220, | |
| "valid_targets_mean": 5558.0, | |
| "valid_targets_min": 1262 | |
| }, | |
| { | |
| "epoch": 1.4352, | |
| "grad_norm": 0.2782121058872405, | |
| "learning_rate": 3.597925402470578e-05, | |
| "loss": 0.4809, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13891330361366272, | |
| "step": 225, | |
| "valid_targets_mean": 6163.1, | |
| "valid_targets_min": 2175 | |
| }, | |
| { | |
| "epoch": 1.4672, | |
| "grad_norm": 0.2511379752274597, | |
| "learning_rate": 3.570771595739445e-05, | |
| "loss": 0.4669, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1152762770652771, | |
| "step": 230, | |
| "valid_targets_mean": 6515.6, | |
| "valid_targets_min": 1578 | |
| }, | |
| { | |
| "epoch": 1.4992, | |
| "grad_norm": 0.27590481971133046, | |
| "learning_rate": 3.5428402441111964e-05, | |
| "loss": 0.4806, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12730714678764343, | |
| "step": 235, | |
| "valid_targets_mean": 5110.8, | |
| "valid_targets_min": 1657 | |
| }, | |
| { | |
| "epoch": 1.5312000000000001, | |
| "grad_norm": 0.254323226627503, | |
| "learning_rate": 3.5141451738352936e-05, | |
| "loss": 0.4659, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10027951002120972, | |
| "step": 240, | |
| "valid_targets_mean": 5534.1, | |
| "valid_targets_min": 891 | |
| }, | |
| { | |
| "epoch": 1.5632000000000001, | |
| "grad_norm": 0.2580174702832105, | |
| "learning_rate": 3.4847005892082266e-05, | |
| "loss": 0.4675, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11096903681755066, | |
| "step": 245, | |
| "valid_targets_mean": 5662.7, | |
| "valid_targets_min": 1644 | |
| }, | |
| { | |
| "epoch": 1.5952, | |
| "grad_norm": 0.30698403507646943, | |
| "learning_rate": 3.454521065542273e-05, | |
| "loss": 0.4859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1288014054298401, | |
| "step": 250, | |
| "valid_targets_mean": 5168.7, | |
| "valid_targets_min": 1162 | |
| }, | |
| { | |
| "epoch": 1.6272, | |
| "grad_norm": 0.25805156829059184, | |
| "learning_rate": 3.423621541950597e-05, | |
| "loss": 0.464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.129526287317276, | |
| "step": 255, | |
| "valid_targets_mean": 7147.4, | |
| "valid_targets_min": 3884 | |
| }, | |
| { | |
| "epoch": 1.6592, | |
| "grad_norm": 0.23616819648599033, | |
| "learning_rate": 3.3920173139522664e-05, | |
| "loss": 0.4673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11364764720201492, | |
| "step": 260, | |
| "valid_targets_mean": 5991.1, | |
| "valid_targets_min": 1280 | |
| }, | |
| { | |
| "epoch": 1.6912, | |
| "grad_norm": 0.2810806381450421, | |
| "learning_rate": 3.35972402590084e-05, | |
| "loss": 0.471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1247754693031311, | |
| "step": 265, | |
| "valid_targets_mean": 5614.1, | |
| "valid_targets_min": 1199 | |
| }, | |
| { | |
| "epoch": 1.7231999999999998, | |
| "grad_norm": 0.2529117867416003, | |
| "learning_rate": 3.326757663240291e-05, | |
| "loss": 0.4793, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1115766316652298, | |
| "step": 270, | |
| "valid_targets_mean": 5206.1, | |
| "valid_targets_min": 1946 | |
| }, | |
| { | |
| "epoch": 1.7551999999999999, | |
| "grad_norm": 0.24073937834464354, | |
| "learning_rate": 3.293134544592073e-05, | |
| "loss": 0.463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12958145141601562, | |
| "step": 275, | |
| "valid_targets_mean": 6616.4, | |
| "valid_targets_min": 3579 | |
| }, | |
| { | |
| "epoch": 1.7872, | |
| "grad_norm": 0.23555835370029385, | |
| "learning_rate": 3.258871313677274e-05, | |
| "loss": 0.4677, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13404905796051025, | |
| "step": 280, | |
| "valid_targets_mean": 7205.4, | |
| "valid_targets_min": 1356 | |
| }, | |
| { | |
| "epoch": 1.8192, | |
| "grad_norm": 0.2513641254982323, | |
| "learning_rate": 3.2239849310778316e-05, | |
| "loss": 0.4636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1133008599281311, | |
| "step": 285, | |
| "valid_targets_mean": 5637.1, | |
| "valid_targets_min": 3779 | |
| }, | |
| { | |
| "epoch": 1.8512, | |
| "grad_norm": 0.22918433139423086, | |
| "learning_rate": 3.188492665840909e-05, | |
| "loss": 0.4649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10168616473674774, | |
| "step": 290, | |
| "valid_targets_mean": 5500.5, | |
| "valid_targets_min": 1523 | |
| }, | |
| { | |
| "epoch": 1.8832, | |
| "grad_norm": 0.25315031376778907, | |
| "learning_rate": 3.1524120869305726e-05, | |
| "loss": 0.4538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11145422607660294, | |
| "step": 295, | |
| "valid_targets_mean": 4931.9, | |
| "valid_targets_min": 1168 | |
| }, | |
| { | |
| "epoch": 1.9152, | |
| "grad_norm": 0.22105397485658976, | |
| "learning_rate": 3.11576105453101e-05, | |
| "loss": 0.4589, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11858774721622467, | |
| "step": 300, | |
| "valid_targets_mean": 6575.4, | |
| "valid_targets_min": 4043 | |
| }, | |
| { | |
| "epoch": 1.9472, | |
| "grad_norm": 0.2668714334524866, | |
| "learning_rate": 3.0785577112055916e-05, | |
| "loss": 0.467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1110314130783081, | |
| "step": 305, | |
| "valid_targets_mean": 4943.4, | |
| "valid_targets_min": 3259 | |
| }, | |
| { | |
| "epoch": 1.9792, | |
| "grad_norm": 0.27142228891732906, | |
| "learning_rate": 3.040820472916153e-05, | |
| "loss": 0.474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10838167369365692, | |
| "step": 310, | |
| "valid_targets_mean": 5198.4, | |
| "valid_targets_min": 1470 | |
| }, | |
| { | |
| "epoch": 2.0064, | |
| "grad_norm": 0.2567890914244406, | |
| "learning_rate": 3.002568019906939e-05, | |
| "loss": 0.463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1107785701751709, | |
| "step": 315, | |
| "valid_targets_mean": 5847.9, | |
| "valid_targets_min": 2018 | |
| }, | |
| { | |
| "epoch": 2.0384, | |
| "grad_norm": 0.2523608373300584, | |
| "learning_rate": 2.963819287457733e-05, | |
| "loss": 0.4488, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10997898876667023, | |
| "step": 320, | |
| "valid_targets_mean": 5916.2, | |
| "valid_targets_min": 2413 | |
| }, | |
| { | |
| "epoch": 2.0704, | |
| "grad_norm": 0.23463415840591811, | |
| "learning_rate": 2.924593456510733e-05, | |
| "loss": 0.4674, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10450360178947449, | |
| "step": 325, | |
| "valid_targets_mean": 5486.4, | |
| "valid_targets_min": 1264 | |
| }, | |
| { | |
| "epoch": 2.1024, | |
| "grad_norm": 0.24843951325249042, | |
| "learning_rate": 2.8849099441758306e-05, | |
| "loss": 0.4558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11403744667768478, | |
| "step": 330, | |
| "valid_targets_mean": 5439.9, | |
| "valid_targets_min": 1138 | |
| }, | |
| { | |
| "epoch": 2.1344, | |
| "grad_norm": 0.24610362358361257, | |
| "learning_rate": 2.844788394118979e-05, | |
| "loss": 0.453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11807750910520554, | |
| "step": 335, | |
| "valid_targets_mean": 5861.1, | |
| "valid_targets_min": 1565 | |
| }, | |
| { | |
| "epoch": 2.1664, | |
| "grad_norm": 0.33040794254123873, | |
| "learning_rate": 2.8042486668384164e-05, | |
| "loss": 0.4545, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11304843425750732, | |
| "step": 340, | |
| "valid_targets_mean": 6610.1, | |
| "valid_targets_min": 2124 | |
| }, | |
| { | |
| "epoch": 2.1984, | |
| "grad_norm": 0.25162468452460407, | |
| "learning_rate": 2.7633108298335582e-05, | |
| "loss": 0.4554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11378766596317291, | |
| "step": 345, | |
| "valid_targets_mean": 5402.4, | |
| "valid_targets_min": 1247 | |
| }, | |
| { | |
| "epoch": 2.2304, | |
| "grad_norm": 0.25754716013199486, | |
| "learning_rate": 2.721995147671416e-05, | |
| "loss": 0.4466, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10926654934883118, | |
| "step": 350, | |
| "valid_targets_mean": 6230.7, | |
| "valid_targets_min": 1159 | |
| }, | |
| { | |
| "epoch": 2.2624, | |
| "grad_norm": 0.25363357284595456, | |
| "learning_rate": 2.68032207195547e-05, | |
| "loss": 0.4655, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12160837650299072, | |
| "step": 355, | |
| "valid_targets_mean": 5901.7, | |
| "valid_targets_min": 1400 | |
| }, | |
| { | |
| "epoch": 2.2944, | |
| "grad_norm": 0.2371953068408656, | |
| "learning_rate": 2.6383122312019604e-05, | |
| "loss": 0.4593, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10440811514854431, | |
| "step": 360, | |
| "valid_targets_mean": 5618.4, | |
| "valid_targets_min": 2764 | |
| }, | |
| { | |
| "epoch": 2.3264, | |
| "grad_norm": 0.24670634376756886, | |
| "learning_rate": 2.595986420628597e-05, | |
| "loss": 0.4554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11458228528499603, | |
| "step": 365, | |
| "valid_targets_mean": 5704.5, | |
| "valid_targets_min": 1938 | |
| }, | |
| { | |
| "epoch": 2.3584, | |
| "grad_norm": 0.23895194020127508, | |
| "learning_rate": 2.5533655918607573e-05, | |
| "loss": 0.4585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10346005856990814, | |
| "step": 370, | |
| "valid_targets_mean": 5528.0, | |
| "valid_targets_min": 1275 | |
| }, | |
| { | |
| "epoch": 2.3904, | |
| "grad_norm": 0.2408178690827922, | |
| "learning_rate": 2.510470842560259e-05, | |
| "loss": 0.4519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11147673428058624, | |
| "step": 375, | |
| "valid_targets_mean": 5869.6, | |
| "valid_targets_min": 1637 | |
| }, | |
| { | |
| "epoch": 2.4224, | |
| "grad_norm": 0.23255787756846094, | |
| "learning_rate": 2.467323405981841e-05, | |
| "loss": 0.4607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10699373483657837, | |
| "step": 380, | |
| "valid_targets_mean": 5419.7, | |
| "valid_targets_min": 2956 | |
| }, | |
| { | |
| "epoch": 2.4544, | |
| "grad_norm": 0.24230598627352132, | |
| "learning_rate": 2.423944640462533e-05, | |
| "loss": 0.4633, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10441893339157104, | |
| "step": 385, | |
| "valid_targets_mean": 4641.1, | |
| "valid_targets_min": 1544 | |
| }, | |
| { | |
| "epoch": 2.4864, | |
| "grad_norm": 0.24609359447675086, | |
| "learning_rate": 2.3803560188490968e-05, | |
| "loss": 0.4507, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13390594720840454, | |
| "step": 390, | |
| "valid_targets_mean": 6129.4, | |
| "valid_targets_min": 1506 | |
| }, | |
| { | |
| "epoch": 2.5183999999999997, | |
| "grad_norm": 0.2450309981216017, | |
| "learning_rate": 2.336579117868789e-05, | |
| "loss": 0.4505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09820785373449326, | |
| "step": 395, | |
| "valid_targets_mean": 4850.6, | |
| "valid_targets_min": 1443 | |
| }, | |
| { | |
| "epoch": 2.5504, | |
| "grad_norm": 0.26353410163682456, | |
| "learning_rate": 2.292635607448711e-05, | |
| "loss": 0.4558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12506261467933655, | |
| "step": 400, | |
| "valid_targets_mean": 6348.2, | |
| "valid_targets_min": 3938 | |
| }, | |
| { | |
| "epoch": 2.5824, | |
| "grad_norm": 0.2591867666837128, | |
| "learning_rate": 2.248547239989008e-05, | |
| "loss": 0.4644, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1142532080411911, | |
| "step": 405, | |
| "valid_targets_mean": 5473.3, | |
| "valid_targets_min": 3717 | |
| }, | |
| { | |
| "epoch": 2.6144, | |
| "grad_norm": 0.23939041830365473, | |
| "learning_rate": 2.204335839595255e-05, | |
| "loss": 0.4579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.126010924577713, | |
| "step": 410, | |
| "valid_targets_mean": 7091.4, | |
| "valid_targets_min": 2590 | |
| }, | |
| { | |
| "epoch": 2.6464, | |
| "grad_norm": 0.27523073180915464, | |
| "learning_rate": 2.1600232912753452e-05, | |
| "loss": 0.4524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12722241878509521, | |
| "step": 415, | |
| "valid_targets_mean": 5683.2, | |
| "valid_targets_min": 1399 | |
| }, | |
| { | |
| "epoch": 2.6784, | |
| "grad_norm": 0.2555993394486925, | |
| "learning_rate": 2.1156315301062293e-05, | |
| "loss": 0.4419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10284954309463501, | |
| "step": 420, | |
| "valid_targets_mean": 5005.5, | |
| "valid_targets_min": 934 | |
| }, | |
| { | |
| "epoch": 2.7104, | |
| "grad_norm": 0.26731870473795194, | |
| "learning_rate": 2.0711825303758712e-05, | |
| "loss": 0.4433, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11506682634353638, | |
| "step": 425, | |
| "valid_targets_mean": 6046.0, | |
| "valid_targets_min": 1647 | |
| }, | |
| { | |
| "epoch": 2.7424, | |
| "grad_norm": 0.2407561884924848, | |
| "learning_rate": 2.0266982947057962e-05, | |
| "loss": 0.4559, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10325722396373749, | |
| "step": 430, | |
| "valid_targets_mean": 5949.6, | |
| "valid_targets_min": 2688 | |
| }, | |
| { | |
| "epoch": 2.7744, | |
| "grad_norm": 0.2336404714487898, | |
| "learning_rate": 1.9822008431596083e-05, | |
| "loss": 0.4522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10586457699537277, | |
| "step": 435, | |
| "valid_targets_mean": 6859.5, | |
| "valid_targets_min": 4222 | |
| }, | |
| { | |
| "epoch": 2.8064, | |
| "grad_norm": 0.22137813612500373, | |
| "learning_rate": 1.937712202342881e-05, | |
| "loss": 0.4408, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12141789495944977, | |
| "step": 440, | |
| "valid_targets_mean": 6645.0, | |
| "valid_targets_min": 3372 | |
| }, | |
| { | |
| "epoch": 2.8384, | |
| "grad_norm": 0.2361975809490191, | |
| "learning_rate": 1.8932543944998037e-05, | |
| "loss": 0.4621, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11469966918230057, | |
| "step": 445, | |
| "valid_targets_mean": 6750.8, | |
| "valid_targets_min": 3870 | |
| }, | |
| { | |
| "epoch": 2.8704, | |
| "grad_norm": 0.2569963385739716, | |
| "learning_rate": 1.8488494266119877e-05, | |
| "loss": 0.4558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10432308912277222, | |
| "step": 450, | |
| "valid_targets_mean": 5330.1, | |
| "valid_targets_min": 1132 | |
| }, | |
| { | |
| "epoch": 2.9024, | |
| "grad_norm": 0.23032242260685118, | |
| "learning_rate": 1.804519279504834e-05, | |
| "loss": 0.4526, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11718428134918213, | |
| "step": 455, | |
| "valid_targets_mean": 6316.1, | |
| "valid_targets_min": 881 | |
| }, | |
| { | |
| "epoch": 2.9344, | |
| "grad_norm": 0.23444798682474463, | |
| "learning_rate": 1.7602858969668365e-05, | |
| "loss": 0.4619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10966004431247711, | |
| "step": 460, | |
| "valid_targets_mean": 5196.4, | |
| "valid_targets_min": 1548 | |
| }, | |
| { | |
| "epoch": 2.9664, | |
| "grad_norm": 0.24127707453224004, | |
| "learning_rate": 1.716171174887231e-05, | |
| "loss": 0.4417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10286953300237656, | |
| "step": 465, | |
| "valid_targets_mean": 5809.4, | |
| "valid_targets_min": 1703 | |
| }, | |
| { | |
| "epoch": 2.9984, | |
| "grad_norm": 0.24085134696567514, | |
| "learning_rate": 1.6721969504173484e-05, | |
| "loss": 0.443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11106819659471512, | |
| "step": 470, | |
| "valid_targets_mean": 5545.3, | |
| "valid_targets_min": 1533 | |
| }, | |
| { | |
| "epoch": 3.0256, | |
| "grad_norm": 0.2541813174463172, | |
| "learning_rate": 1.628384991161041e-05, | |
| "loss": 0.4528, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11498390138149261, | |
| "step": 475, | |
| "valid_targets_mean": 5463.5, | |
| "valid_targets_min": 1925 | |
| }, | |
| { | |
| "epoch": 3.0576, | |
| "grad_norm": 0.2353622062322344, | |
| "learning_rate": 1.5847569843995452e-05, | |
| "loss": 0.4449, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1087237298488617, | |
| "step": 480, | |
| "valid_targets_mean": 5840.9, | |
| "valid_targets_min": 1507 | |
| }, | |
| { | |
| "epoch": 3.0896, | |
| "grad_norm": 0.22580596321042834, | |
| "learning_rate": 1.5413345263560922e-05, | |
| "loss": 0.4421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11730214208364487, | |
| "step": 485, | |
| "valid_targets_mean": 7615.9, | |
| "valid_targets_min": 4895 | |
| }, | |
| { | |
| "epoch": 3.1216, | |
| "grad_norm": 0.2346986705939085, | |
| "learning_rate": 1.4981391115056032e-05, | |
| "loss": 0.4484, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08560355007648468, | |
| "step": 490, | |
| "valid_targets_mean": 5496.8, | |
| "valid_targets_min": 2426 | |
| }, | |
| { | |
| "epoch": 3.1536, | |
| "grad_norm": 0.2474046202773989, | |
| "learning_rate": 1.455192121934748e-05, | |
| "loss": 0.4392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10706610977649689, | |
| "step": 495, | |
| "valid_targets_mean": 6306.4, | |
| "valid_targets_min": 1468 | |
| }, | |
| { | |
| "epoch": 3.1856, | |
| "grad_norm": 0.24598088386643885, | |
| "learning_rate": 1.4125148167576303e-05, | |
| "loss": 0.4517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0977165475487709, | |
| "step": 500, | |
| "valid_targets_mean": 5140.1, | |
| "valid_targets_min": 2124 | |
| }, | |
| { | |
| "epoch": 3.2176, | |
| "grad_norm": 0.23772157152624504, | |
| "learning_rate": 1.3701283215923563e-05, | |
| "loss": 0.4515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12374231219291687, | |
| "step": 505, | |
| "valid_targets_mean": 6650.8, | |
| "valid_targets_min": 2146 | |
| }, | |
| { | |
| "epoch": 3.2496, | |
| "grad_norm": 0.2381861982363929, | |
| "learning_rate": 1.328053618103677e-05, | |
| "loss": 0.4524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08905310928821564, | |
| "step": 510, | |
| "valid_targets_mean": 4238.1, | |
| "valid_targets_min": 1246 | |
| }, | |
| { | |
| "epoch": 3.2816, | |
| "grad_norm": 0.24129917399571515, | |
| "learning_rate": 1.2863115336168916e-05, | |
| "loss": 0.4424, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10858754813671112, | |
| "step": 515, | |
| "valid_targets_mean": 5508.2, | |
| "valid_targets_min": 1990 | |
| }, | |
| { | |
| "epoch": 3.3136, | |
| "grad_norm": 0.2299768195746735, | |
| "learning_rate": 1.2449227308081509e-05, | |
| "loss": 0.4305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10494785010814667, | |
| "step": 520, | |
| "valid_targets_mean": 5587.0, | |
| "valid_targets_min": 1414 | |
| }, | |
| { | |
| "epoch": 3.3456, | |
| "grad_norm": 0.22756287907781986, | |
| "learning_rate": 1.2039076974762587e-05, | |
| "loss": 0.4412, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09129714220762253, | |
| "step": 525, | |
| "valid_targets_mean": 5011.6, | |
| "valid_targets_min": 1275 | |
| }, | |
| { | |
| "epoch": 3.3776, | |
| "grad_norm": 0.2490236621582409, | |
| "learning_rate": 1.163286736401044e-05, | |
| "loss": 0.4353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10486893355846405, | |
| "step": 530, | |
| "valid_targets_mean": 5423.1, | |
| "valid_targets_min": 1137 | |
| }, | |
| { | |
| "epoch": 3.4096, | |
| "grad_norm": 0.22017430759615417, | |
| "learning_rate": 1.123079955293322e-05, | |
| "loss": 0.4427, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1027197316288948, | |
| "step": 535, | |
| "valid_targets_mean": 5919.5, | |
| "valid_targets_min": 1816 | |
| }, | |
| { | |
| "epoch": 3.4416, | |
| "grad_norm": 0.2140066307199504, | |
| "learning_rate": 1.0833072568414037e-05, | |
| "loss": 0.4454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11336848139762878, | |
| "step": 540, | |
| "valid_targets_mean": 7503.7, | |
| "valid_targets_min": 4812 | |
| }, | |
| { | |
| "epoch": 3.4736000000000002, | |
| "grad_norm": 0.23150362740195682, | |
| "learning_rate": 1.0439883288591057e-05, | |
| "loss": 0.4561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11124958097934723, | |
| "step": 545, | |
| "valid_targets_mean": 6104.5, | |
| "valid_targets_min": 3593 | |
| }, | |
| { | |
| "epoch": 3.5056000000000003, | |
| "grad_norm": 0.2239460927463848, | |
| "learning_rate": 1.0051426345401202e-05, | |
| "loss": 0.4383, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08680635690689087, | |
| "step": 550, | |
| "valid_targets_mean": 4237.5, | |
| "valid_targets_min": 1464 | |
| }, | |
| { | |
| "epoch": 3.5376, | |
| "grad_norm": 0.2276675294214827, | |
| "learning_rate": 9.667894028235704e-06, | |
| "loss": 0.4512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12256583571434021, | |
| "step": 555, | |
| "valid_targets_mean": 6583.3, | |
| "valid_targets_min": 3849 | |
| }, | |
| { | |
| "epoch": 3.5696, | |
| "grad_norm": 0.23874452325722306, | |
| "learning_rate": 9.289476188755315e-06, | |
| "loss": 0.4354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10612036287784576, | |
| "step": 560, | |
| "valid_targets_mean": 6009.8, | |
| "valid_targets_min": 2688 | |
| }, | |
| { | |
| "epoch": 3.6016, | |
| "grad_norm": 0.21514329189036926, | |
| "learning_rate": 8.916360146912122e-06, | |
| "loss": 0.4469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11689209938049316, | |
| "step": 565, | |
| "valid_targets_mean": 6188.1, | |
| "valid_targets_min": 3993 | |
| }, | |
| { | |
| "epoch": 3.6336, | |
| "grad_norm": 0.2362020094484568, | |
| "learning_rate": 8.548730598224646e-06, | |
| "loss": 0.4484, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11842970550060272, | |
| "step": 570, | |
| "valid_targets_mean": 5449.2, | |
| "valid_targets_min": 1275 | |
| }, | |
| { | |
| "epoch": 3.6656, | |
| "grad_norm": 0.2401716944941585, | |
| "learning_rate": 8.186769522352053e-06, | |
| "loss": 0.4512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11459657549858093, | |
| "step": 575, | |
| "valid_targets_mean": 5901.6, | |
| "valid_targets_min": 3112 | |
| }, | |
| { | |
| "epoch": 3.6976, | |
| "grad_norm": 0.2260946660210682, | |
| "learning_rate": 7.830656093012714e-06, | |
| "loss": 0.45, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11391419172286987, | |
| "step": 580, | |
| "valid_targets_mean": 6314.2, | |
| "valid_targets_min": 2536 | |
| }, | |
| { | |
| "epoch": 3.7296, | |
| "grad_norm": 0.23490628110150955, | |
| "learning_rate": 7.480566589291696e-06, | |
| "loss": 0.4402, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1118554174900055, | |
| "step": 585, | |
| "valid_targets_mean": 6000.4, | |
| "valid_targets_min": 2420 | |
| }, | |
| { | |
| "epoch": 3.7616, | |
| "grad_norm": 0.22394540910322736, | |
| "learning_rate": 7.1366743083812285e-06, | |
| "loss": 0.4336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11416637897491455, | |
| "step": 590, | |
| "valid_targets_mean": 5651.9, | |
| "valid_targets_min": 1312 | |
| }, | |
| { | |
| "epoch": 3.7936, | |
| "grad_norm": 0.23434924832059395, | |
| "learning_rate": 6.799149479797101e-06, | |
| "loss": 0.4524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1163535937666893, | |
| "step": 595, | |
| "valid_targets_mean": 6361.0, | |
| "valid_targets_min": 3963 | |
| }, | |
| { | |
| "epoch": 3.8256, | |
| "grad_norm": 0.20434011194504567, | |
| "learning_rate": 6.4681591811137e-06, | |
| "loss": 0.431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10242234170436859, | |
| "step": 600, | |
| "valid_targets_mean": 6733.9, | |
| "valid_targets_min": 2050 | |
| }, | |
| { | |
| "epoch": 3.8576, | |
| "grad_norm": 0.2205683173173195, | |
| "learning_rate": 6.143867255259197e-06, | |
| "loss": 0.4469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10782508552074432, | |
| "step": 605, | |
| "valid_targets_mean": 5887.1, | |
| "valid_targets_min": 1264 | |
| }, | |
| { | |
| "epoch": 3.8895999999999997, | |
| "grad_norm": 0.22196701228922652, | |
| "learning_rate": 5.8264342294119504e-06, | |
| "loss": 0.4507, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09781044721603394, | |
| "step": 610, | |
| "valid_targets_mean": 6692.9, | |
| "valid_targets_min": 3335 | |
| }, | |
| { | |
| "epoch": 3.9215999999999998, | |
| "grad_norm": 0.23419677990052767, | |
| "learning_rate": 5.516017235538258e-06, | |
| "loss": 0.4398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.088894322514534, | |
| "step": 615, | |
| "valid_targets_mean": 5359.7, | |
| "valid_targets_min": 1567 | |
| }, | |
| { | |
| "epoch": 3.9536, | |
| "grad_norm": 0.20991276705753859, | |
| "learning_rate": 5.212769932610695e-06, | |
| "loss": 0.4465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08706802129745483, | |
| "step": 620, | |
| "valid_targets_mean": 5569.8, | |
| "valid_targets_min": 1280 | |
| }, | |
| { | |
| "epoch": 3.9856, | |
| "grad_norm": 0.2305681012598391, | |
| "learning_rate": 4.916842430545681e-06, | |
| "loss": 0.4464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10766085982322693, | |
| "step": 625, | |
| "valid_targets_mean": 6046.6, | |
| "valid_targets_min": 2956 | |
| }, | |
| { | |
| "epoch": 4.0128, | |
| "grad_norm": 0.23248030070161385, | |
| "learning_rate": 4.628381215897837e-06, | |
| "loss": 0.4368, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09869658201932907, | |
| "step": 630, | |
| "valid_targets_mean": 5463.2, | |
| "valid_targets_min": 1626 | |
| }, | |
| { | |
| "epoch": 4.0448, | |
| "grad_norm": 0.2320693470818531, | |
| "learning_rate": 4.347529079347914e-06, | |
| "loss": 0.4401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12055571377277374, | |
| "step": 635, | |
| "valid_targets_mean": 5899.8, | |
| "valid_targets_min": 4103 | |
| }, | |
| { | |
| "epoch": 4.0768, | |
| "grad_norm": 0.2376324397286371, | |
| "learning_rate": 4.074425045020247e-06, | |
| "loss": 0.4506, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10627171397209167, | |
| "step": 640, | |
| "valid_targets_mean": 4855.4, | |
| "valid_targets_min": 1443 | |
| }, | |
| { | |
| "epoch": 4.1088, | |
| "grad_norm": 0.23365976592481544, | |
| "learning_rate": 3.8092043016646487e-06, | |
| "loss": 0.4392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11408081650733948, | |
| "step": 645, | |
| "valid_targets_mean": 5649.8, | |
| "valid_targets_min": 1275 | |
| }, | |
| { | |
| "epoch": 4.1408, | |
| "grad_norm": 0.21456883184147094, | |
| "learning_rate": 3.551998135736867e-06, | |
| "loss": 0.4316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10515138506889343, | |
| "step": 650, | |
| "valid_targets_mean": 5795.8, | |
| "valid_targets_min": 1994 | |
| }, | |
| { | |
| "epoch": 4.1728, | |
| "grad_norm": 0.23159726143523057, | |
| "learning_rate": 3.3029338664107267e-06, | |
| "loss": 0.4436, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12827849388122559, | |
| "step": 655, | |
| "valid_targets_mean": 6799.2, | |
| "valid_targets_min": 4330 | |
| }, | |
| { | |
| "epoch": 4.2048, | |
| "grad_norm": 0.2267271521046096, | |
| "learning_rate": 3.0621347825540625e-06, | |
| "loss": 0.4335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11244725435972214, | |
| "step": 660, | |
| "valid_targets_mean": 5340.2, | |
| "valid_targets_min": 2584 | |
| }, | |
| { | |
| "epoch": 4.2368, | |
| "grad_norm": 0.20149080037204617, | |
| "learning_rate": 2.8297200816997183e-06, | |
| "loss": 0.4342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09943097829818726, | |
| "step": 665, | |
| "valid_targets_mean": 5967.9, | |
| "valid_targets_min": 1349 | |
| }, | |
| { | |
| "epoch": 4.2688, | |
| "grad_norm": 0.22316435861473205, | |
| "learning_rate": 2.605804811041803e-06, | |
| "loss": 0.4475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1142878532409668, | |
| "step": 670, | |
| "valid_targets_mean": 5788.6, | |
| "valid_targets_min": 1504 | |
| }, | |
| { | |
| "epoch": 4.3008, | |
| "grad_norm": 0.23749043061782324, | |
| "learning_rate": 2.390499810486351e-06, | |
| "loss": 0.4398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09940489381551743, | |
| "step": 675, | |
| "valid_targets_mean": 5341.0, | |
| "valid_targets_min": 1211 | |
| }, | |
| { | |
| "epoch": 4.3328, | |
| "grad_norm": 0.21705720222967442, | |
| "learning_rate": 2.183911657784685e-06, | |
| "loss": 0.4384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10698309540748596, | |
| "step": 680, | |
| "valid_targets_mean": 6206.0, | |
| "valid_targets_min": 1104 | |
| }, | |
| { | |
| "epoch": 4.3648, | |
| "grad_norm": 0.2359689779618734, | |
| "learning_rate": 1.986142615776532e-06, | |
| "loss": 0.4354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12712863087654114, | |
| "step": 685, | |
| "valid_targets_mean": 6414.2, | |
| "valid_targets_min": 2090 | |
| }, | |
| { | |
| "epoch": 4.3968, | |
| "grad_norm": 0.2270181783968223, | |
| "learning_rate": 1.7972905817690644e-06, | |
| "loss": 0.4482, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11009643971920013, | |
| "step": 690, | |
| "valid_targets_mean": 5408.8, | |
| "valid_targets_min": 1740 | |
| }, | |
| { | |
| "epoch": 4.4288, | |
| "grad_norm": 0.1982686366700752, | |
| "learning_rate": 1.617449039076955e-06, | |
| "loss": 0.4467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11948183923959732, | |
| "step": 695, | |
| "valid_targets_mean": 8110.3, | |
| "valid_targets_min": 1565 | |
| }, | |
| { | |
| "epoch": 4.4608, | |
| "grad_norm": 0.22182051002480957, | |
| "learning_rate": 1.4467070107473413e-06, | |
| "loss": 0.4407, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09709776937961578, | |
| "step": 700, | |
| "valid_targets_mean": 5159.9, | |
| "valid_targets_min": 3161 | |
| }, | |
| { | |
| "epoch": 4.4928, | |
| "grad_norm": 0.21510025780610673, | |
| "learning_rate": 1.2851490154926816e-06, | |
| "loss": 0.4475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11582086980342865, | |
| "step": 705, | |
| "valid_targets_mean": 6414.6, | |
| "valid_targets_min": 1497 | |
| }, | |
| { | |
| "epoch": 4.5248, | |
| "grad_norm": 0.20748343105814038, | |
| "learning_rate": 1.1328550258533211e-06, | |
| "loss": 0.4444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12172878533601761, | |
| "step": 710, | |
| "valid_targets_mean": 6439.6, | |
| "valid_targets_min": 3898 | |
| }, | |
| { | |
| "epoch": 4.5568, | |
| "grad_norm": 0.21381893999640084, | |
| "learning_rate": 9.899004286103953e-07, | |
| "loss": 0.4367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10774854570627213, | |
| "step": 715, | |
| "valid_targets_mean": 6118.2, | |
| "valid_targets_min": 1434 | |
| }, | |
| { | |
| "epoch": 4.5888, | |
| "grad_norm": 0.22561568200277468, | |
| "learning_rate": 8.5635598746876e-07, | |
| "loss": 0.4344, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11041155457496643, | |
| "step": 720, | |
| "valid_targets_mean": 6051.2, | |
| "valid_targets_min": 3805 | |
| }, | |
| { | |
| "epoch": 4.6208, | |
| "grad_norm": 0.20402603591723495, | |
| "learning_rate": 7.32287808028389e-07, | |
| "loss": 0.4338, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11558706313371658, | |
| "step": 725, | |
| "valid_targets_mean": 6586.0, | |
| "valid_targets_min": 4236 | |
| }, | |
| { | |
| "epoch": 4.6528, | |
| "grad_norm": 0.24563597589147643, | |
| "learning_rate": 6.177573050615327e-07, | |
| "loss": 0.4304, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09163513034582138, | |
| "step": 730, | |
| "valid_targets_mean": 4575.3, | |
| "valid_targets_min": 1257 | |
| }, | |
| { | |
| "epoch": 4.6848, | |
| "grad_norm": 0.21645901836986417, | |
| "learning_rate": 5.128211721119213e-07, | |
| "loss": 0.4337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11745560169219971, | |
| "step": 735, | |
| "valid_targets_mean": 6024.5, | |
| "valid_targets_min": 1402 | |
| }, | |
| { | |
| "epoch": 4.7168, | |
| "grad_norm": 0.21851740632245759, | |
| "learning_rate": 4.175313534309755e-07, | |
| "loss": 0.4366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10883457958698273, | |
| "step": 740, | |
| "valid_targets_mean": 6283.2, | |
| "valid_targets_min": 1555 | |
| }, | |
| { | |
| "epoch": 4.7488, | |
| "grad_norm": 0.21932736290236435, | |
| "learning_rate": 3.319350182649861e-07, | |
| "loss": 0.4468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10903538763523102, | |
| "step": 745, | |
| "valid_targets_mean": 6333.0, | |
| "valid_targets_min": 1504 | |
| }, | |
| { | |
| "epoch": 4.7808, | |
| "grad_norm": 0.22616118624010112, | |
| "learning_rate": 2.560745375059392e-07, | |
| "loss": 0.4382, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11657419800758362, | |
| "step": 750, | |
| "valid_targets_mean": 7166.1, | |
| "valid_targets_min": 3048 | |
| }, | |
| { | |
| "epoch": 4.8128, | |
| "grad_norm": 0.22643827545429354, | |
| "learning_rate": 1.8998746271758016e-07, | |
| "loss": 0.4369, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10837658494710922, | |
| "step": 755, | |
| "valid_targets_mean": 5800.5, | |
| "valid_targets_min": 3765 | |
| }, | |
| { | |
| "epoch": 4.8448, | |
| "grad_norm": 0.21056524475735883, | |
| "learning_rate": 1.337065075470778e-07, | |
| "loss": 0.4389, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10905088484287262, | |
| "step": 760, | |
| "valid_targets_mean": 6031.9, | |
| "valid_targets_min": 3851 | |
| }, | |
| { | |
| "epoch": 4.8768, | |
| "grad_norm": 0.22673914171528292, | |
| "learning_rate": 8.725953153150279e-08, | |
| "loss": 0.4299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11067554354667664, | |
| "step": 765, | |
| "valid_targets_mean": 5955.0, | |
| "valid_targets_min": 3870 | |
| }, | |
| { | |
| "epoch": 4.9088, | |
| "grad_norm": 0.2234901980490257, | |
| "learning_rate": 5.066952630711886e-08, | |
| "loss": 0.459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13258133828639984, | |
| "step": 770, | |
| "valid_targets_mean": 5992.2, | |
| "valid_targets_min": 1472 | |
| }, | |
| { | |
| "epoch": 4.9408, | |
| "grad_norm": 0.22776223805560375, | |
| "learning_rate": 2.3954604228342283e-08, | |
| "loss": 0.4454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10461519658565521, | |
| "step": 775, | |
| "valid_targets_mean": 5661.4, | |
| "valid_targets_min": 1507 | |
| }, | |
| { | |
| "epoch": 4.9728, | |
| "grad_norm": 0.2189412079803582, | |
| "learning_rate": 7.12798940197601e-09, | |
| "loss": 0.439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11273640394210815, | |
| "step": 780, | |
| "valid_targets_mean": 5641.1, | |
| "valid_targets_min": 2808 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.381336408442084, | |
| "learning_rate": 1.9801114115480802e-10, | |
| "loss": 0.4311, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3657087981700897, | |
| "step": 785, | |
| "valid_targets_mean": 6505.4, | |
| "valid_targets_min": 1469 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3657087981700897, | |
| "step": 785, | |
| "total_flos": 2.0706726654792172e+18, | |
| "train_loss": 0.47480044941993277, | |
| "train_runtime": 19659.7391, | |
| "train_samples_per_second": 2.543, | |
| "train_steps_per_second": 0.04, | |
| "valid_targets_mean": 6505.4, | |
| "valid_targets_min": 1469 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 785, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.0706726654792172e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |