| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 2442, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.012285012285012284, | |
| "grad_norm": 19.340452072016063, | |
| "learning_rate": 6.530612244897961e-07, | |
| "loss": 1.0268, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 1.0363750457763672, | |
| "step": 5, | |
| "valid_targets_mean": 551.8, | |
| "valid_targets_min": 379 | |
| }, | |
| { | |
| "epoch": 0.02457002457002457, | |
| "grad_norm": 17.08893563458667, | |
| "learning_rate": 1.469387755102041e-06, | |
| "loss": 0.9836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.9560065865516663, | |
| "step": 10, | |
| "valid_targets_mean": 630.4, | |
| "valid_targets_min": 357 | |
| }, | |
| { | |
| "epoch": 0.036855036855036855, | |
| "grad_norm": 15.515149146739999, | |
| "learning_rate": 2.285714285714286e-06, | |
| "loss": 0.9821, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.9447790384292603, | |
| "step": 15, | |
| "valid_targets_mean": 589.2, | |
| "valid_targets_min": 395 | |
| }, | |
| { | |
| "epoch": 0.04914004914004914, | |
| "grad_norm": 9.407548224971022, | |
| "learning_rate": 3.1020408163265307e-06, | |
| "loss": 0.8773, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.8385097980499268, | |
| "step": 20, | |
| "valid_targets_mean": 603.5, | |
| "valid_targets_min": 308 | |
| }, | |
| { | |
| "epoch": 0.06142506142506143, | |
| "grad_norm": 7.055702490270053, | |
| "learning_rate": 3.9183673469387755e-06, | |
| "loss": 0.82, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.7672586441040039, | |
| "step": 25, | |
| "valid_targets_mean": 571.4, | |
| "valid_targets_min": 341 | |
| }, | |
| { | |
| "epoch": 0.07371007371007371, | |
| "grad_norm": 5.105243917945062, | |
| "learning_rate": 4.734693877551021e-06, | |
| "loss": 0.6919, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6746348142623901, | |
| "step": 30, | |
| "valid_targets_mean": 494.1, | |
| "valid_targets_min": 350 | |
| }, | |
| { | |
| "epoch": 0.085995085995086, | |
| "grad_norm": 3.791005812926048, | |
| "learning_rate": 5.551020408163266e-06, | |
| "loss": 0.6329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6144911050796509, | |
| "step": 35, | |
| "valid_targets_mean": 499.4, | |
| "valid_targets_min": 327 | |
| }, | |
| { | |
| "epoch": 0.09828009828009827, | |
| "grad_norm": 3.0694531628102637, | |
| "learning_rate": 6.36734693877551e-06, | |
| "loss": 0.5631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5236115455627441, | |
| "step": 40, | |
| "valid_targets_mean": 531.1, | |
| "valid_targets_min": 303 | |
| }, | |
| { | |
| "epoch": 0.11056511056511056, | |
| "grad_norm": 2.3798079841537203, | |
| "learning_rate": 7.183673469387755e-06, | |
| "loss": 0.4885, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4935950040817261, | |
| "step": 45, | |
| "valid_targets_mean": 519.8, | |
| "valid_targets_min": 309 | |
| }, | |
| { | |
| "epoch": 0.12285012285012285, | |
| "grad_norm": 2.326009549639266, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.4598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44546374678611755, | |
| "step": 50, | |
| "valid_targets_mean": 478.7, | |
| "valid_targets_min": 325 | |
| }, | |
| { | |
| "epoch": 0.13513513513513514, | |
| "grad_norm": 1.8861705823036319, | |
| "learning_rate": 8.816326530612247e-06, | |
| "loss": 0.4127, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4182875454425812, | |
| "step": 55, | |
| "valid_targets_mean": 551.9, | |
| "valid_targets_min": 314 | |
| }, | |
| { | |
| "epoch": 0.14742014742014742, | |
| "grad_norm": 1.8700358687363636, | |
| "learning_rate": 9.63265306122449e-06, | |
| "loss": 0.4035, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3552461266517639, | |
| "step": 60, | |
| "valid_targets_mean": 541.9, | |
| "valid_targets_min": 317 | |
| }, | |
| { | |
| "epoch": 0.1597051597051597, | |
| "grad_norm": 2.0142370613194434, | |
| "learning_rate": 1.0448979591836737e-05, | |
| "loss": 0.3729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3656679391860962, | |
| "step": 65, | |
| "valid_targets_mean": 562.7, | |
| "valid_targets_min": 358 | |
| }, | |
| { | |
| "epoch": 0.171990171990172, | |
| "grad_norm": 1.7529734182420011, | |
| "learning_rate": 1.126530612244898e-05, | |
| "loss": 0.3651, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3600107431411743, | |
| "step": 70, | |
| "valid_targets_mean": 590.9, | |
| "valid_targets_min": 328 | |
| }, | |
| { | |
| "epoch": 0.18427518427518427, | |
| "grad_norm": 1.5209870168026196, | |
| "learning_rate": 1.2081632653061225e-05, | |
| "loss": 0.3538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3366089463233948, | |
| "step": 75, | |
| "valid_targets_mean": 656.9, | |
| "valid_targets_min": 366 | |
| }, | |
| { | |
| "epoch": 0.19656019656019655, | |
| "grad_norm": 1.71123642654244, | |
| "learning_rate": 1.2897959183673469e-05, | |
| "loss": 0.3303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35618680715560913, | |
| "step": 80, | |
| "valid_targets_mean": 606.8, | |
| "valid_targets_min": 370 | |
| }, | |
| { | |
| "epoch": 0.20884520884520885, | |
| "grad_norm": 1.4853648763338492, | |
| "learning_rate": 1.3714285714285716e-05, | |
| "loss": 0.3245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3359481692314148, | |
| "step": 85, | |
| "valid_targets_mean": 622.8, | |
| "valid_targets_min": 380 | |
| }, | |
| { | |
| "epoch": 0.22113022113022113, | |
| "grad_norm": 1.361528089135814, | |
| "learning_rate": 1.4530612244897961e-05, | |
| "loss": 0.2952, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2936898469924927, | |
| "step": 90, | |
| "valid_targets_mean": 587.8, | |
| "valid_targets_min": 388 | |
| }, | |
| { | |
| "epoch": 0.2334152334152334, | |
| "grad_norm": 4.740219098514282, | |
| "learning_rate": 1.5346938775510204e-05, | |
| "loss": 0.3211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3229856491088867, | |
| "step": 95, | |
| "valid_targets_mean": 641.1, | |
| "valid_targets_min": 319 | |
| }, | |
| { | |
| "epoch": 0.2457002457002457, | |
| "grad_norm": 1.5245883787059937, | |
| "learning_rate": 1.616326530612245e-05, | |
| "loss": 0.3009, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.280606210231781, | |
| "step": 100, | |
| "valid_targets_mean": 569.8, | |
| "valid_targets_min": 329 | |
| }, | |
| { | |
| "epoch": 0.257985257985258, | |
| "grad_norm": 1.5406725035413686, | |
| "learning_rate": 1.6979591836734695e-05, | |
| "loss": 0.2913, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3024246394634247, | |
| "step": 105, | |
| "valid_targets_mean": 608.4, | |
| "valid_targets_min": 326 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "grad_norm": 1.4482316981295145, | |
| "learning_rate": 1.779591836734694e-05, | |
| "loss": 0.2983, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3170296549797058, | |
| "step": 110, | |
| "valid_targets_mean": 627.4, | |
| "valid_targets_min": 353 | |
| }, | |
| { | |
| "epoch": 0.28255528255528256, | |
| "grad_norm": 1.4660214762255919, | |
| "learning_rate": 1.8612244897959185e-05, | |
| "loss": 0.2739, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26000386476516724, | |
| "step": 115, | |
| "valid_targets_mean": 612.3, | |
| "valid_targets_min": 389 | |
| }, | |
| { | |
| "epoch": 0.29484029484029484, | |
| "grad_norm": 1.6409256874657099, | |
| "learning_rate": 1.942857142857143e-05, | |
| "loss": 0.2961, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3217686414718628, | |
| "step": 120, | |
| "valid_targets_mean": 561.9, | |
| "valid_targets_min": 327 | |
| }, | |
| { | |
| "epoch": 0.3071253071253071, | |
| "grad_norm": 1.4065164837723425, | |
| "learning_rate": 2.0244897959183672e-05, | |
| "loss": 0.2921, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2969103753566742, | |
| "step": 125, | |
| "valid_targets_mean": 589.6, | |
| "valid_targets_min": 310 | |
| }, | |
| { | |
| "epoch": 0.3194103194103194, | |
| "grad_norm": 1.5813750575415058, | |
| "learning_rate": 2.106122448979592e-05, | |
| "loss": 0.2818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2890018820762634, | |
| "step": 130, | |
| "valid_targets_mean": 586.1, | |
| "valid_targets_min": 300 | |
| }, | |
| { | |
| "epoch": 0.3316953316953317, | |
| "grad_norm": 1.5224653877090757, | |
| "learning_rate": 2.1877551020408166e-05, | |
| "loss": 0.2917, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2904941439628601, | |
| "step": 135, | |
| "valid_targets_mean": 576.4, | |
| "valid_targets_min": 354 | |
| }, | |
| { | |
| "epoch": 0.343980343980344, | |
| "grad_norm": 1.3452861170507189, | |
| "learning_rate": 2.269387755102041e-05, | |
| "loss": 0.2826, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31476208567619324, | |
| "step": 140, | |
| "valid_targets_mean": 692.4, | |
| "valid_targets_min": 352 | |
| }, | |
| { | |
| "epoch": 0.35626535626535627, | |
| "grad_norm": 1.5523700640116074, | |
| "learning_rate": 2.3510204081632656e-05, | |
| "loss": 0.2606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23628346621990204, | |
| "step": 145, | |
| "valid_targets_mean": 509.4, | |
| "valid_targets_min": 329 | |
| }, | |
| { | |
| "epoch": 0.36855036855036855, | |
| "grad_norm": 1.4055510862867506, | |
| "learning_rate": 2.4326530612244898e-05, | |
| "loss": 0.2773, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28314584493637085, | |
| "step": 150, | |
| "valid_targets_mean": 613.8, | |
| "valid_targets_min": 387 | |
| }, | |
| { | |
| "epoch": 0.3808353808353808, | |
| "grad_norm": 1.5047918312671191, | |
| "learning_rate": 2.5142857142857143e-05, | |
| "loss": 0.2662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2703036665916443, | |
| "step": 155, | |
| "valid_targets_mean": 611.1, | |
| "valid_targets_min": 373 | |
| }, | |
| { | |
| "epoch": 0.3931203931203931, | |
| "grad_norm": 1.6190760418995298, | |
| "learning_rate": 2.5959183673469392e-05, | |
| "loss": 0.2813, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2736574411392212, | |
| "step": 160, | |
| "valid_targets_mean": 575.7, | |
| "valid_targets_min": 380 | |
| }, | |
| { | |
| "epoch": 0.40540540540540543, | |
| "grad_norm": 1.5123475030605578, | |
| "learning_rate": 2.6775510204081637e-05, | |
| "loss": 0.2607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2557530105113983, | |
| "step": 165, | |
| "valid_targets_mean": 565.6, | |
| "valid_targets_min": 344 | |
| }, | |
| { | |
| "epoch": 0.4176904176904177, | |
| "grad_norm": 1.619694103526443, | |
| "learning_rate": 2.7591836734693882e-05, | |
| "loss": 0.2724, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27611520886421204, | |
| "step": 170, | |
| "valid_targets_mean": 541.1, | |
| "valid_targets_min": 342 | |
| }, | |
| { | |
| "epoch": 0.42997542997543, | |
| "grad_norm": 1.4826756269933499, | |
| "learning_rate": 2.8408163265306124e-05, | |
| "loss": 0.2629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2644728720188141, | |
| "step": 175, | |
| "valid_targets_mean": 522.6, | |
| "valid_targets_min": 364 | |
| }, | |
| { | |
| "epoch": 0.44226044226044225, | |
| "grad_norm": 1.5670463191851074, | |
| "learning_rate": 2.922448979591837e-05, | |
| "loss": 0.2757, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27483248710632324, | |
| "step": 180, | |
| "valid_targets_mean": 602.3, | |
| "valid_targets_min": 327 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 1.408617532690139, | |
| "learning_rate": 3.0040816326530614e-05, | |
| "loss": 0.2585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24957937002182007, | |
| "step": 185, | |
| "valid_targets_mean": 599.8, | |
| "valid_targets_min": 399 | |
| }, | |
| { | |
| "epoch": 0.4668304668304668, | |
| "grad_norm": 1.4355940504839466, | |
| "learning_rate": 3.085714285714286e-05, | |
| "loss": 0.2652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2865978181362152, | |
| "step": 190, | |
| "valid_targets_mean": 613.9, | |
| "valid_targets_min": 364 | |
| }, | |
| { | |
| "epoch": 0.47911547911547914, | |
| "grad_norm": 1.3616233501948924, | |
| "learning_rate": 3.1673469387755105e-05, | |
| "loss": 0.2643, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24286630749702454, | |
| "step": 195, | |
| "valid_targets_mean": 604.1, | |
| "valid_targets_min": 362 | |
| }, | |
| { | |
| "epoch": 0.4914004914004914, | |
| "grad_norm": 1.3541147315022435, | |
| "learning_rate": 3.2489795918367346e-05, | |
| "loss": 0.2535, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22083410620689392, | |
| "step": 200, | |
| "valid_targets_mean": 519.1, | |
| "valid_targets_min": 318 | |
| }, | |
| { | |
| "epoch": 0.5036855036855037, | |
| "grad_norm": 1.5699583522473184, | |
| "learning_rate": 3.3306122448979595e-05, | |
| "loss": 0.2663, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2536630928516388, | |
| "step": 205, | |
| "valid_targets_mean": 503.4, | |
| "valid_targets_min": 291 | |
| }, | |
| { | |
| "epoch": 0.515970515970516, | |
| "grad_norm": 1.4524492019396484, | |
| "learning_rate": 3.4122448979591843e-05, | |
| "loss": 0.2574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2488933801651001, | |
| "step": 210, | |
| "valid_targets_mean": 568.8, | |
| "valid_targets_min": 358 | |
| }, | |
| { | |
| "epoch": 0.5282555282555282, | |
| "grad_norm": 1.455074638920383, | |
| "learning_rate": 3.4938775510204085e-05, | |
| "loss": 0.253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2419266402721405, | |
| "step": 215, | |
| "valid_targets_mean": 550.0, | |
| "valid_targets_min": 332 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 1.4702983842648538, | |
| "learning_rate": 3.575510204081633e-05, | |
| "loss": 0.2443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24988505244255066, | |
| "step": 220, | |
| "valid_targets_mean": 522.4, | |
| "valid_targets_min": 314 | |
| }, | |
| { | |
| "epoch": 0.5528255528255528, | |
| "grad_norm": 1.2678095680289083, | |
| "learning_rate": 3.6571428571428576e-05, | |
| "loss": 0.2535, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23922167718410492, | |
| "step": 225, | |
| "valid_targets_mean": 618.5, | |
| "valid_targets_min": 391 | |
| }, | |
| { | |
| "epoch": 0.5651105651105651, | |
| "grad_norm": 1.3124904196473028, | |
| "learning_rate": 3.738775510204082e-05, | |
| "loss": 0.2513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2686520218849182, | |
| "step": 230, | |
| "valid_targets_mean": 610.7, | |
| "valid_targets_min": 348 | |
| }, | |
| { | |
| "epoch": 0.5773955773955773, | |
| "grad_norm": 1.2321365465764669, | |
| "learning_rate": 3.8204081632653066e-05, | |
| "loss": 0.2429, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22676542401313782, | |
| "step": 235, | |
| "valid_targets_mean": 529.8, | |
| "valid_targets_min": 340 | |
| }, | |
| { | |
| "epoch": 0.5896805896805897, | |
| "grad_norm": 1.4304495261227945, | |
| "learning_rate": 3.902040816326531e-05, | |
| "loss": 0.2402, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22461196780204773, | |
| "step": 240, | |
| "valid_targets_mean": 525.8, | |
| "valid_targets_min": 351 | |
| }, | |
| { | |
| "epoch": 0.601965601965602, | |
| "grad_norm": 1.2501593563439926, | |
| "learning_rate": 3.983673469387755e-05, | |
| "loss": 0.2553, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2536337077617645, | |
| "step": 245, | |
| "valid_targets_mean": 624.1, | |
| "valid_targets_min": 341 | |
| }, | |
| { | |
| "epoch": 0.6142506142506142, | |
| "grad_norm": 1.365723699724294, | |
| "learning_rate": 3.9999672841332876e-05, | |
| "loss": 0.2569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24196216464042664, | |
| "step": 250, | |
| "valid_targets_mean": 572.6, | |
| "valid_targets_min": 377 | |
| }, | |
| { | |
| "epoch": 0.6265356265356266, | |
| "grad_norm": 1.4290316134985837, | |
| "learning_rate": 3.999834377759164e-05, | |
| "loss": 0.2583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2442513257265091, | |
| "step": 255, | |
| "valid_targets_mean": 570.1, | |
| "valid_targets_min": 392 | |
| }, | |
| { | |
| "epoch": 0.6388206388206388, | |
| "grad_norm": 1.279025523831711, | |
| "learning_rate": 3.999599242924703e-05, | |
| "loss": 0.2602, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2676926255226135, | |
| "step": 260, | |
| "valid_targets_mean": 616.0, | |
| "valid_targets_min": 359 | |
| }, | |
| { | |
| "epoch": 0.6511056511056511, | |
| "grad_norm": 1.3742183041274103, | |
| "learning_rate": 3.999261891649637e-05, | |
| "loss": 0.2446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23325861990451813, | |
| "step": 265, | |
| "valid_targets_mean": 535.8, | |
| "valid_targets_min": 362 | |
| }, | |
| { | |
| "epoch": 0.6633906633906634, | |
| "grad_norm": 1.3319257895644228, | |
| "learning_rate": 3.9988223411788436e-05, | |
| "loss": 0.2553, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23690742254257202, | |
| "step": 270, | |
| "valid_targets_mean": 529.6, | |
| "valid_targets_min": 336 | |
| }, | |
| { | |
| "epoch": 0.6756756756756757, | |
| "grad_norm": 1.2333251207411267, | |
| "learning_rate": 3.998280613981468e-05, | |
| "loss": 0.2441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2443428933620453, | |
| "step": 275, | |
| "valid_targets_mean": 592.6, | |
| "valid_targets_min": 381 | |
| }, | |
| { | |
| "epoch": 0.687960687960688, | |
| "grad_norm": 1.2653219638572915, | |
| "learning_rate": 3.9976367377497725e-05, | |
| "loss": 0.2533, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2635766565799713, | |
| "step": 280, | |
| "valid_targets_mean": 661.1, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 0.7002457002457002, | |
| "grad_norm": 1.2132987899715728, | |
| "learning_rate": 3.99689074539772e-05, | |
| "loss": 0.2428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23148810863494873, | |
| "step": 285, | |
| "valid_targets_mean": 590.9, | |
| "valid_targets_min": 342 | |
| }, | |
| { | |
| "epoch": 0.7125307125307125, | |
| "grad_norm": 1.3232052555393115, | |
| "learning_rate": 3.9960426750592936e-05, | |
| "loss": 0.2366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2554059624671936, | |
| "step": 290, | |
| "valid_targets_mean": 586.4, | |
| "valid_targets_min": 391 | |
| }, | |
| { | |
| "epoch": 0.7248157248157249, | |
| "grad_norm": 1.3250013840237953, | |
| "learning_rate": 3.995092570086546e-05, | |
| "loss": 0.2372, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23205137252807617, | |
| "step": 295, | |
| "valid_targets_mean": 534.9, | |
| "valid_targets_min": 379 | |
| }, | |
| { | |
| "epoch": 0.7371007371007371, | |
| "grad_norm": 1.174536495094598, | |
| "learning_rate": 3.9940404790473825e-05, | |
| "loss": 0.2362, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23327840864658356, | |
| "step": 300, | |
| "valid_targets_mean": 577.4, | |
| "valid_targets_min": 338 | |
| }, | |
| { | |
| "epoch": 0.7493857493857494, | |
| "grad_norm": 1.2736986293286983, | |
| "learning_rate": 3.992886455723082e-05, | |
| "loss": 0.2423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23847690224647522, | |
| "step": 305, | |
| "valid_targets_mean": 604.8, | |
| "valid_targets_min": 356 | |
| }, | |
| { | |
| "epoch": 0.7616707616707616, | |
| "grad_norm": 1.202738294769371, | |
| "learning_rate": 3.991630559105541e-05, | |
| "loss": 0.2429, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25206199288368225, | |
| "step": 310, | |
| "valid_targets_mean": 595.6, | |
| "valid_targets_min": 336 | |
| }, | |
| { | |
| "epoch": 0.773955773955774, | |
| "grad_norm": 1.1705643307443048, | |
| "learning_rate": 3.990272853394268e-05, | |
| "loss": 0.2401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2267996072769165, | |
| "step": 315, | |
| "valid_targets_mean": 609.8, | |
| "valid_targets_min": 350 | |
| }, | |
| { | |
| "epoch": 0.7862407862407862, | |
| "grad_norm": 1.2741625837837056, | |
| "learning_rate": 3.988813407993089e-05, | |
| "loss": 0.2272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22659529745578766, | |
| "step": 320, | |
| "valid_targets_mean": 520.9, | |
| "valid_targets_min": 339 | |
| }, | |
| { | |
| "epoch": 0.7985257985257985, | |
| "grad_norm": 1.2095767910140882, | |
| "learning_rate": 3.987252297506613e-05, | |
| "loss": 0.2387, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23505187034606934, | |
| "step": 325, | |
| "valid_targets_mean": 557.3, | |
| "valid_targets_min": 351 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "grad_norm": 1.2469501504557738, | |
| "learning_rate": 3.9855896017364075e-05, | |
| "loss": 0.2361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2419613003730774, | |
| "step": 330, | |
| "valid_targets_mean": 522.1, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 0.8230958230958231, | |
| "grad_norm": 1.3525431983270617, | |
| "learning_rate": 3.983825405676927e-05, | |
| "loss": 0.2425, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23498916625976562, | |
| "step": 335, | |
| "valid_targets_mean": 526.7, | |
| "valid_targets_min": 364 | |
| }, | |
| { | |
| "epoch": 0.8353808353808354, | |
| "grad_norm": 1.266098276438524, | |
| "learning_rate": 3.981959799511161e-05, | |
| "loss": 0.2361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22201555967330933, | |
| "step": 340, | |
| "valid_targets_mean": 516.1, | |
| "valid_targets_min": 287 | |
| }, | |
| { | |
| "epoch": 0.8476658476658476, | |
| "grad_norm": 1.1950372008453742, | |
| "learning_rate": 3.979992878606032e-05, | |
| "loss": 0.2391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23893003165721893, | |
| "step": 345, | |
| "valid_targets_mean": 604.9, | |
| "valid_targets_min": 358 | |
| }, | |
| { | |
| "epoch": 0.85995085995086, | |
| "grad_norm": 1.1942813421589011, | |
| "learning_rate": 3.977924743507513e-05, | |
| "loss": 0.2423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2164364457130432, | |
| "step": 350, | |
| "valid_targets_mean": 568.4, | |
| "valid_targets_min": 386 | |
| }, | |
| { | |
| "epoch": 0.8722358722358723, | |
| "grad_norm": 1.1521319253994895, | |
| "learning_rate": 3.975755499935492e-05, | |
| "loss": 0.2541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2973342835903168, | |
| "step": 355, | |
| "valid_targets_mean": 677.1, | |
| "valid_targets_min": 359 | |
| }, | |
| { | |
| "epoch": 0.8845208845208845, | |
| "grad_norm": 1.2819519264235832, | |
| "learning_rate": 3.973485258778368e-05, | |
| "loss": 0.2315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2431134581565857, | |
| "step": 360, | |
| "valid_targets_mean": 520.0, | |
| "valid_targets_min": 320 | |
| }, | |
| { | |
| "epoch": 0.8968058968058968, | |
| "grad_norm": 1.121053883863795, | |
| "learning_rate": 3.971114136087379e-05, | |
| "loss": 0.242, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2276688814163208, | |
| "step": 365, | |
| "valid_targets_mean": 623.6, | |
| "valid_targets_min": 314 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 1.2335240034008033, | |
| "learning_rate": 3.968642253070675e-05, | |
| "loss": 0.2506, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24868804216384888, | |
| "step": 370, | |
| "valid_targets_mean": 486.9, | |
| "valid_targets_min": 349 | |
| }, | |
| { | |
| "epoch": 0.9213759213759214, | |
| "grad_norm": 1.084721824161106, | |
| "learning_rate": 3.966069736087116e-05, | |
| "loss": 0.2419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21974554657936096, | |
| "step": 375, | |
| "valid_targets_mean": 572.9, | |
| "valid_targets_min": 388 | |
| }, | |
| { | |
| "epoch": 0.9336609336609336, | |
| "grad_norm": 1.5415223968516378, | |
| "learning_rate": 3.963396716639818e-05, | |
| "loss": 0.23, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22481045126914978, | |
| "step": 380, | |
| "valid_targets_mean": 513.1, | |
| "valid_targets_min": 335 | |
| }, | |
| { | |
| "epoch": 0.9459459459459459, | |
| "grad_norm": 1.184005524489551, | |
| "learning_rate": 3.960623331369427e-05, | |
| "loss": 0.229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20717152953147888, | |
| "step": 385, | |
| "valid_targets_mean": 569.8, | |
| "valid_targets_min": 378 | |
| }, | |
| { | |
| "epoch": 0.9582309582309583, | |
| "grad_norm": 1.1736401735916975, | |
| "learning_rate": 3.957749722047138e-05, | |
| "loss": 0.2246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24852100014686584, | |
| "step": 390, | |
| "valid_targets_mean": 562.8, | |
| "valid_targets_min": 340 | |
| }, | |
| { | |
| "epoch": 0.9705159705159705, | |
| "grad_norm": 1.2154851241242357, | |
| "learning_rate": 3.9547760355674405e-05, | |
| "loss": 0.2306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2460574209690094, | |
| "step": 395, | |
| "valid_targets_mean": 592.2, | |
| "valid_targets_min": 315 | |
| }, | |
| { | |
| "epoch": 0.9828009828009828, | |
| "grad_norm": 1.1414024083345906, | |
| "learning_rate": 3.951702423940621e-05, | |
| "loss": 0.2432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.273346483707428, | |
| "step": 400, | |
| "valid_targets_mean": 704.4, | |
| "valid_targets_min": 372 | |
| }, | |
| { | |
| "epoch": 0.995085995085995, | |
| "grad_norm": 1.2888653065517275, | |
| "learning_rate": 3.948529044284981e-05, | |
| "loss": 0.2285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2338770031929016, | |
| "step": 405, | |
| "valid_targets_mean": 528.1, | |
| "valid_targets_min": 383 | |
| }, | |
| { | |
| "epoch": 1.0073710073710074, | |
| "grad_norm": 1.273222474953753, | |
| "learning_rate": 3.9452560588188135e-05, | |
| "loss": 0.2126, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20872265100479126, | |
| "step": 410, | |
| "valid_targets_mean": 483.4, | |
| "valid_targets_min": 345 | |
| }, | |
| { | |
| "epoch": 1.0196560196560196, | |
| "grad_norm": 1.1230278716937114, | |
| "learning_rate": 3.9418836348521045e-05, | |
| "loss": 0.2122, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1878654807806015, | |
| "step": 415, | |
| "valid_targets_mean": 560.2, | |
| "valid_targets_min": 350 | |
| }, | |
| { | |
| "epoch": 1.031941031941032, | |
| "grad_norm": 1.3385969964323565, | |
| "learning_rate": 3.9384119447779854e-05, | |
| "loss": 0.2053, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2124098837375641, | |
| "step": 420, | |
| "valid_targets_mean": 531.4, | |
| "valid_targets_min": 346 | |
| }, | |
| { | |
| "epoch": 1.0442260442260443, | |
| "grad_norm": 1.133701044120653, | |
| "learning_rate": 3.934841166063919e-05, | |
| "loss": 0.2151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21468473970890045, | |
| "step": 425, | |
| "valid_targets_mean": 593.8, | |
| "valid_targets_min": 337 | |
| }, | |
| { | |
| "epoch": 1.0565110565110565, | |
| "grad_norm": 1.1884236103320405, | |
| "learning_rate": 3.931171481242625e-05, | |
| "loss": 0.1999, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20533575117588043, | |
| "step": 430, | |
| "valid_targets_mean": 586.6, | |
| "valid_targets_min": 364 | |
| }, | |
| { | |
| "epoch": 1.0687960687960687, | |
| "grad_norm": 1.2358471031407317, | |
| "learning_rate": 3.927403077902753e-05, | |
| "loss": 0.2029, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20509561896324158, | |
| "step": 435, | |
| "valid_targets_mean": 554.1, | |
| "valid_targets_min": 303 | |
| }, | |
| { | |
| "epoch": 1.0810810810810811, | |
| "grad_norm": 1.1162627713390312, | |
| "learning_rate": 3.9235361486792905e-05, | |
| "loss": 0.2007, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2194371223449707, | |
| "step": 440, | |
| "valid_targets_mean": 592.0, | |
| "valid_targets_min": 391 | |
| }, | |
| { | |
| "epoch": 1.0933660933660934, | |
| "grad_norm": 1.1664113838151917, | |
| "learning_rate": 3.9195708912437176e-05, | |
| "loss": 0.2222, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19551971554756165, | |
| "step": 445, | |
| "valid_targets_mean": 562.9, | |
| "valid_targets_min": 299 | |
| }, | |
| { | |
| "epoch": 1.1056511056511056, | |
| "grad_norm": 1.1148563823576316, | |
| "learning_rate": 3.915507508293901e-05, | |
| "loss": 0.2028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.224244624376297, | |
| "step": 450, | |
| "valid_targets_mean": 601.6, | |
| "valid_targets_min": 341 | |
| }, | |
| { | |
| "epoch": 1.117936117936118, | |
| "grad_norm": 1.209728679709457, | |
| "learning_rate": 3.911346207543734e-05, | |
| "loss": 0.1981, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21454249322414398, | |
| "step": 455, | |
| "valid_targets_mean": 560.9, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 1.1302211302211302, | |
| "grad_norm": 1.0703696823302942, | |
| "learning_rate": 3.907087201712515e-05, | |
| "loss": 0.2154, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22700481116771698, | |
| "step": 460, | |
| "valid_targets_mean": 657.5, | |
| "valid_targets_min": 321 | |
| }, | |
| { | |
| "epoch": 1.1425061425061425, | |
| "grad_norm": 1.087498535135239, | |
| "learning_rate": 3.902730708514078e-05, | |
| "loss": 0.2037, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19121837615966797, | |
| "step": 465, | |
| "valid_targets_mean": 605.1, | |
| "valid_targets_min": 414 | |
| }, | |
| { | |
| "epoch": 1.154791154791155, | |
| "grad_norm": 1.1754498968382059, | |
| "learning_rate": 3.8982769506456616e-05, | |
| "loss": 0.1997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2021336704492569, | |
| "step": 470, | |
| "valid_targets_mean": 610.5, | |
| "valid_targets_min": 390 | |
| }, | |
| { | |
| "epoch": 1.1670761670761671, | |
| "grad_norm": 1.0324644891352066, | |
| "learning_rate": 3.893726155776524e-05, | |
| "loss": 0.2022, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16948124766349792, | |
| "step": 475, | |
| "valid_targets_mean": 563.5, | |
| "valid_targets_min": 355 | |
| }, | |
| { | |
| "epoch": 1.1793611793611793, | |
| "grad_norm": 1.1444059489981688, | |
| "learning_rate": 3.8890785565363046e-05, | |
| "loss": 0.2131, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22946152091026306, | |
| "step": 480, | |
| "valid_targets_mean": 585.4, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 1.1916461916461916, | |
| "grad_norm": 1.0980010911655984, | |
| "learning_rate": 3.884334390503136e-05, | |
| "loss": 0.2136, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20137803256511688, | |
| "step": 485, | |
| "valid_targets_mean": 556.1, | |
| "valid_targets_min": 352 | |
| }, | |
| { | |
| "epoch": 1.203931203931204, | |
| "grad_norm": 1.2187757226237967, | |
| "learning_rate": 3.8794939001914955e-05, | |
| "loss": 0.209, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2199264019727707, | |
| "step": 490, | |
| "valid_targets_mean": 605.1, | |
| "valid_targets_min": 350 | |
| }, | |
| { | |
| "epoch": 1.2162162162162162, | |
| "grad_norm": 1.291377657263299, | |
| "learning_rate": 3.87455733303981e-05, | |
| "loss": 0.2161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22223210334777832, | |
| "step": 495, | |
| "valid_targets_mean": 562.2, | |
| "valid_targets_min": 316 | |
| }, | |
| { | |
| "epoch": 1.2285012285012284, | |
| "grad_norm": 1.087891213866882, | |
| "learning_rate": 3.869524941397805e-05, | |
| "loss": 0.2058, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22227486968040466, | |
| "step": 500, | |
| "valid_targets_mean": 561.1, | |
| "valid_targets_min": 374 | |
| }, | |
| { | |
| "epoch": 1.2407862407862407, | |
| "grad_norm": 1.0875951457065944, | |
| "learning_rate": 3.8643969825136095e-05, | |
| "loss": 0.209, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21871165931224823, | |
| "step": 505, | |
| "valid_targets_mean": 605.8, | |
| "valid_targets_min": 291 | |
| }, | |
| { | |
| "epoch": 1.253071253071253, | |
| "grad_norm": 1.1345287983708072, | |
| "learning_rate": 3.8591737185206024e-05, | |
| "loss": 0.2134, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19657248258590698, | |
| "step": 510, | |
| "valid_targets_mean": 517.6, | |
| "valid_targets_min": 365 | |
| }, | |
| { | |
| "epoch": 1.2653562653562653, | |
| "grad_norm": 1.1578099843167293, | |
| "learning_rate": 3.853855416424011e-05, | |
| "loss": 0.2046, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22923174500465393, | |
| "step": 515, | |
| "valid_targets_mean": 629.9, | |
| "valid_targets_min": 351 | |
| }, | |
| { | |
| "epoch": 1.2776412776412776, | |
| "grad_norm": 1.3467752290200405, | |
| "learning_rate": 3.848442348087267e-05, | |
| "loss": 0.2129, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22567245364189148, | |
| "step": 520, | |
| "valid_targets_mean": 493.1, | |
| "valid_targets_min": 348 | |
| }, | |
| { | |
| "epoch": 1.28992628992629, | |
| "grad_norm": 1.0355038419138378, | |
| "learning_rate": 3.842934790218106e-05, | |
| "loss": 0.2027, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1945795863866806, | |
| "step": 525, | |
| "valid_targets_mean": 609.4, | |
| "valid_targets_min": 355 | |
| }, | |
| { | |
| "epoch": 1.3022113022113022, | |
| "grad_norm": 1.098408635608927, | |
| "learning_rate": 3.837333024354422e-05, | |
| "loss": 0.2111, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21321016550064087, | |
| "step": 530, | |
| "valid_targets_mean": 629.6, | |
| "valid_targets_min": 334 | |
| }, | |
| { | |
| "epoch": 1.3144963144963144, | |
| "grad_norm": 1.1965501456460534, | |
| "learning_rate": 3.8316373368498794e-05, | |
| "loss": 0.197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20622071623802185, | |
| "step": 535, | |
| "valid_targets_mean": 498.1, | |
| "valid_targets_min": 329 | |
| }, | |
| { | |
| "epoch": 1.3267813267813269, | |
| "grad_norm": 1.2780117337853627, | |
| "learning_rate": 3.82584801885927e-05, | |
| "loss": 0.2112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21127185225486755, | |
| "step": 540, | |
| "valid_targets_mean": 512.1, | |
| "valid_targets_min": 272 | |
| }, | |
| { | |
| "epoch": 1.339066339066339, | |
| "grad_norm": 1.0208209030208835, | |
| "learning_rate": 3.8199653663236336e-05, | |
| "loss": 0.2215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23393374681472778, | |
| "step": 545, | |
| "valid_targets_mean": 637.0, | |
| "valid_targets_min": 389 | |
| }, | |
| { | |
| "epoch": 1.3513513513513513, | |
| "grad_norm": 1.0756398207865907, | |
| "learning_rate": 3.813989679955128e-05, | |
| "loss": 0.2102, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21633684635162354, | |
| "step": 550, | |
| "valid_targets_mean": 600.6, | |
| "valid_targets_min": 332 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 1.2709984419070997, | |
| "learning_rate": 3.8079212652216595e-05, | |
| "loss": 0.1973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19809314608573914, | |
| "step": 555, | |
| "valid_targets_mean": 507.6, | |
| "valid_targets_min": 391 | |
| }, | |
| { | |
| "epoch": 1.375921375921376, | |
| "grad_norm": 1.064418446515292, | |
| "learning_rate": 3.8017604323312616e-05, | |
| "loss": 0.2009, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2035004198551178, | |
| "step": 560, | |
| "valid_targets_mean": 572.9, | |
| "valid_targets_min": 366 | |
| }, | |
| { | |
| "epoch": 1.3882063882063882, | |
| "grad_norm": 1.1078788278566427, | |
| "learning_rate": 3.795507496216246e-05, | |
| "loss": 0.2136, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24907588958740234, | |
| "step": 565, | |
| "valid_targets_mean": 608.2, | |
| "valid_targets_min": 386 | |
| }, | |
| { | |
| "epoch": 1.4004914004914004, | |
| "grad_norm": 1.0256660334768586, | |
| "learning_rate": 3.789162776517098e-05, | |
| "loss": 0.2026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2145499885082245, | |
| "step": 570, | |
| "valid_targets_mean": 657.9, | |
| "valid_targets_min": 373 | |
| }, | |
| { | |
| "epoch": 1.4127764127764126, | |
| "grad_norm": 1.034031043079854, | |
| "learning_rate": 3.78272659756614e-05, | |
| "loss": 0.2183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2272603064775467, | |
| "step": 575, | |
| "valid_targets_mean": 645.5, | |
| "valid_targets_min": 340 | |
| }, | |
| { | |
| "epoch": 1.425061425061425, | |
| "grad_norm": 1.2896668422957693, | |
| "learning_rate": 3.776199288370948e-05, | |
| "loss": 0.2177, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23518535494804382, | |
| "step": 580, | |
| "valid_targets_mean": 468.1, | |
| "valid_targets_min": 319 | |
| }, | |
| { | |
| "epoch": 1.4373464373464373, | |
| "grad_norm": 0.8865050790970145, | |
| "learning_rate": 3.7695811825975386e-05, | |
| "loss": 0.2022, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1835131049156189, | |
| "step": 585, | |
| "valid_targets_mean": 766.4, | |
| "valid_targets_min": 556 | |
| }, | |
| { | |
| "epoch": 1.4496314496314495, | |
| "grad_norm": 1.0052694619139135, | |
| "learning_rate": 3.76287261855331e-05, | |
| "loss": 0.1979, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21307772397994995, | |
| "step": 590, | |
| "valid_targets_mean": 663.4, | |
| "valid_targets_min": 324 | |
| }, | |
| { | |
| "epoch": 1.461916461916462, | |
| "grad_norm": 0.968909885622136, | |
| "learning_rate": 3.7560739391697465e-05, | |
| "loss": 0.2058, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1846340447664261, | |
| "step": 595, | |
| "valid_targets_mean": 574.1, | |
| "valid_targets_min": 357 | |
| }, | |
| { | |
| "epoch": 1.4742014742014742, | |
| "grad_norm": 1.008739229853549, | |
| "learning_rate": 3.749185491984891e-05, | |
| "loss": 0.2011, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1903940886259079, | |
| "step": 600, | |
| "valid_targets_mean": 651.4, | |
| "valid_targets_min": 349 | |
| }, | |
| { | |
| "epoch": 1.4864864864864864, | |
| "grad_norm": 1.0894449533360777, | |
| "learning_rate": 3.7422076291255785e-05, | |
| "loss": 0.1951, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19377467036247253, | |
| "step": 605, | |
| "valid_targets_mean": 501.9, | |
| "valid_targets_min": 360 | |
| }, | |
| { | |
| "epoch": 1.4987714987714988, | |
| "grad_norm": 1.1134116216367664, | |
| "learning_rate": 3.7351407072894356e-05, | |
| "loss": 0.2005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19366130232810974, | |
| "step": 610, | |
| "valid_targets_mean": 540.0, | |
| "valid_targets_min": 323 | |
| }, | |
| { | |
| "epoch": 1.511056511056511, | |
| "grad_norm": 1.1085285737625885, | |
| "learning_rate": 3.7279850877266486e-05, | |
| "loss": 0.2165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23652777075767517, | |
| "step": 615, | |
| "valid_targets_mean": 552.9, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 1.5233415233415233, | |
| "grad_norm": 1.1659336753490528, | |
| "learning_rate": 3.720741136221491e-05, | |
| "loss": 0.1904, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20393647253513336, | |
| "step": 620, | |
| "valid_targets_mean": 562.8, | |
| "valid_targets_min": 339 | |
| }, | |
| { | |
| "epoch": 1.5356265356265357, | |
| "grad_norm": 1.1256183805366937, | |
| "learning_rate": 3.713409223073636e-05, | |
| "loss": 0.2087, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21131408214569092, | |
| "step": 625, | |
| "valid_targets_mean": 596.2, | |
| "valid_targets_min": 361 | |
| }, | |
| { | |
| "epoch": 1.547911547911548, | |
| "grad_norm": 1.1025314376028277, | |
| "learning_rate": 3.705989723079214e-05, | |
| "loss": 0.1998, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2073504477739334, | |
| "step": 630, | |
| "valid_targets_mean": 534.4, | |
| "valid_targets_min": 347 | |
| }, | |
| { | |
| "epoch": 1.5601965601965602, | |
| "grad_norm": 1.2328995169055594, | |
| "learning_rate": 3.698483015511665e-05, | |
| "loss": 0.2005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20556914806365967, | |
| "step": 635, | |
| "valid_targets_mean": 556.0, | |
| "valid_targets_min": 393 | |
| }, | |
| { | |
| "epoch": 1.5724815724815726, | |
| "grad_norm": 1.13051808020514, | |
| "learning_rate": 3.690889484102344e-05, | |
| "loss": 0.2034, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20527680218219757, | |
| "step": 640, | |
| "valid_targets_mean": 560.2, | |
| "valid_targets_min": 387 | |
| }, | |
| { | |
| "epoch": 1.5847665847665846, | |
| "grad_norm": 1.0356033195955525, | |
| "learning_rate": 3.683209517020908e-05, | |
| "loss": 0.213, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21655723452568054, | |
| "step": 645, | |
| "valid_targets_mean": 582.9, | |
| "valid_targets_min": 338 | |
| }, | |
| { | |
| "epoch": 1.597051597051597, | |
| "grad_norm": 1.1170971606578726, | |
| "learning_rate": 3.675443506855473e-05, | |
| "loss": 0.2036, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2006620168685913, | |
| "step": 650, | |
| "valid_targets_mean": 613.5, | |
| "valid_targets_min": 318 | |
| }, | |
| { | |
| "epoch": 1.6093366093366095, | |
| "grad_norm": 1.133535405808262, | |
| "learning_rate": 3.6675918505925456e-05, | |
| "loss": 0.201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18652908504009247, | |
| "step": 655, | |
| "valid_targets_mean": 495.4, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 1.6216216216216215, | |
| "grad_norm": 0.9925583371315405, | |
| "learning_rate": 3.6596549495967276e-05, | |
| "loss": 0.2068, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2260739654302597, | |
| "step": 660, | |
| "valid_targets_mean": 691.1, | |
| "valid_targets_min": 372 | |
| }, | |
| { | |
| "epoch": 1.633906633906634, | |
| "grad_norm": 1.145416405295079, | |
| "learning_rate": 3.651633209590202e-05, | |
| "loss": 0.1991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1762043833732605, | |
| "step": 665, | |
| "valid_targets_mean": 534.9, | |
| "valid_targets_min": 313 | |
| }, | |
| { | |
| "epoch": 1.6461916461916462, | |
| "grad_norm": 1.1479183598008718, | |
| "learning_rate": 3.6435270406319914e-05, | |
| "loss": 0.2345, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22825786471366882, | |
| "step": 670, | |
| "valid_targets_mean": 511.7, | |
| "valid_targets_min": 382 | |
| }, | |
| { | |
| "epoch": 1.6584766584766584, | |
| "grad_norm": 1.0177952792861085, | |
| "learning_rate": 3.635336857096997e-05, | |
| "loss": 0.2002, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1958564817905426, | |
| "step": 675, | |
| "valid_targets_mean": 596.1, | |
| "valid_targets_min": 367 | |
| }, | |
| { | |
| "epoch": 1.6707616707616708, | |
| "grad_norm": 0.9497476755880511, | |
| "learning_rate": 3.627063077654815e-05, | |
| "loss": 0.1991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18790635466575623, | |
| "step": 680, | |
| "valid_targets_mean": 577.7, | |
| "valid_targets_min": 364 | |
| }, | |
| { | |
| "epoch": 1.683046683046683, | |
| "grad_norm": 1.0364511769228686, | |
| "learning_rate": 3.618706125248337e-05, | |
| "loss": 0.2105, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20568892359733582, | |
| "step": 685, | |
| "valid_targets_mean": 610.6, | |
| "valid_targets_min": 395 | |
| }, | |
| { | |
| "epoch": 1.6953316953316953, | |
| "grad_norm": 1.0653626980123834, | |
| "learning_rate": 3.6102664270721275e-05, | |
| "loss": 0.2097, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2012636959552765, | |
| "step": 690, | |
| "valid_targets_mean": 584.4, | |
| "valid_targets_min": 380 | |
| }, | |
| { | |
| "epoch": 1.7076167076167077, | |
| "grad_norm": 1.0782173350105624, | |
| "learning_rate": 3.601744414550589e-05, | |
| "loss": 0.2029, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21700087189674377, | |
| "step": 695, | |
| "valid_targets_mean": 545.4, | |
| "valid_targets_min": 363 | |
| }, | |
| { | |
| "epoch": 1.71990171990172, | |
| "grad_norm": 1.0898960424875124, | |
| "learning_rate": 3.593140523315906e-05, | |
| "loss": 0.2099, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22257234156131744, | |
| "step": 700, | |
| "valid_targets_mean": 520.8, | |
| "valid_targets_min": 347 | |
| }, | |
| { | |
| "epoch": 1.7321867321867321, | |
| "grad_norm": 1.0486466431772246, | |
| "learning_rate": 3.584455193185778e-05, | |
| "loss": 0.202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20295602083206177, | |
| "step": 705, | |
| "valid_targets_mean": 544.9, | |
| "valid_targets_min": 340 | |
| }, | |
| { | |
| "epoch": 1.7444717444717446, | |
| "grad_norm": 1.084800118329105, | |
| "learning_rate": 3.575688868140933e-05, | |
| "loss": 0.2032, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19122770428657532, | |
| "step": 710, | |
| "valid_targets_mean": 488.4, | |
| "valid_targets_min": 318 | |
| }, | |
| { | |
| "epoch": 1.7567567567567568, | |
| "grad_norm": 1.084292899345759, | |
| "learning_rate": 3.566841996302438e-05, | |
| "loss": 0.2051, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19632896780967712, | |
| "step": 715, | |
| "valid_targets_mean": 510.2, | |
| "valid_targets_min": 335 | |
| }, | |
| { | |
| "epoch": 1.769041769041769, | |
| "grad_norm": 1.0557684475273712, | |
| "learning_rate": 3.557915029908787e-05, | |
| "loss": 0.2064, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2040451020002365, | |
| "step": 720, | |
| "valid_targets_mean": 558.2, | |
| "valid_targets_min": 312 | |
| }, | |
| { | |
| "epoch": 1.7813267813267815, | |
| "grad_norm": 0.9261448145365117, | |
| "learning_rate": 3.548908425292784e-05, | |
| "loss": 0.206, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20299643278121948, | |
| "step": 725, | |
| "valid_targets_mean": 621.5, | |
| "valid_targets_min": 315 | |
| }, | |
| { | |
| "epoch": 1.7936117936117935, | |
| "grad_norm": 1.059967470462753, | |
| "learning_rate": 3.5398226428582165e-05, | |
| "loss": 0.2016, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1749897301197052, | |
| "step": 730, | |
| "valid_targets_mean": 587.3, | |
| "valid_targets_min": 327 | |
| }, | |
| { | |
| "epoch": 1.805896805896806, | |
| "grad_norm": 1.0927225436828998, | |
| "learning_rate": 3.530658147056321e-05, | |
| "loss": 0.2119, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21093472838401794, | |
| "step": 735, | |
| "valid_targets_mean": 536.0, | |
| "valid_targets_min": 367 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 1.0767012332996604, | |
| "learning_rate": 3.521415406362041e-05, | |
| "loss": 0.208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2154802829027176, | |
| "step": 740, | |
| "valid_targets_mean": 587.4, | |
| "valid_targets_min": 358 | |
| }, | |
| { | |
| "epoch": 1.8304668304668303, | |
| "grad_norm": 1.0258650874116662, | |
| "learning_rate": 3.512094893250076e-05, | |
| "loss": 0.2038, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18948307633399963, | |
| "step": 745, | |
| "valid_targets_mean": 556.7, | |
| "valid_targets_min": 325 | |
| }, | |
| { | |
| "epoch": 1.8427518427518428, | |
| "grad_norm": 1.0917952930239936, | |
| "learning_rate": 3.5026970841707366e-05, | |
| "loss": 0.1969, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20295953750610352, | |
| "step": 750, | |
| "valid_targets_mean": 637.2, | |
| "valid_targets_min": 302 | |
| }, | |
| { | |
| "epoch": 1.855036855036855, | |
| "grad_norm": 1.0259051559636694, | |
| "learning_rate": 3.493222459525579e-05, | |
| "loss": 0.1963, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18988922238349915, | |
| "step": 755, | |
| "valid_targets_mean": 574.6, | |
| "valid_targets_min": 389 | |
| }, | |
| { | |
| "epoch": 1.8673218673218672, | |
| "grad_norm": 1.107801334038011, | |
| "learning_rate": 3.483671503642858e-05, | |
| "loss": 0.2029, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20162153244018555, | |
| "step": 760, | |
| "valid_targets_mean": 559.1, | |
| "valid_targets_min": 341 | |
| }, | |
| { | |
| "epoch": 1.8796068796068797, | |
| "grad_norm": 0.9513895961250298, | |
| "learning_rate": 3.474044704752761e-05, | |
| "loss": 0.2019, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20117336511611938, | |
| "step": 765, | |
| "valid_targets_mean": 614.8, | |
| "valid_targets_min": 344 | |
| }, | |
| { | |
| "epoch": 1.8918918918918919, | |
| "grad_norm": 1.087713893272352, | |
| "learning_rate": 3.464342554962454e-05, | |
| "loss": 0.2028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21448519825935364, | |
| "step": 770, | |
| "valid_targets_mean": 522.4, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 1.904176904176904, | |
| "grad_norm": 0.9467693162634699, | |
| "learning_rate": 3.4545655502309254e-05, | |
| "loss": 0.2004, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2018240988254547, | |
| "step": 775, | |
| "valid_targets_mean": 646.3, | |
| "valid_targets_min": 320 | |
| }, | |
| { | |
| "epoch": 1.9164619164619165, | |
| "grad_norm": 0.994395677212433, | |
| "learning_rate": 3.444714190343633e-05, | |
| "loss": 0.1997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1869840919971466, | |
| "step": 780, | |
| "valid_targets_mean": 559.4, | |
| "valid_targets_min": 315 | |
| }, | |
| { | |
| "epoch": 1.9287469287469288, | |
| "grad_norm": 1.1350837336602309, | |
| "learning_rate": 3.434788978886957e-05, | |
| "loss": 0.2049, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19758516550064087, | |
| "step": 785, | |
| "valid_targets_mean": 514.1, | |
| "valid_targets_min": 360 | |
| }, | |
| { | |
| "epoch": 1.941031941031941, | |
| "grad_norm": 1.0742750813094744, | |
| "learning_rate": 3.424790423222455e-05, | |
| "loss": 0.1911, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19571363925933838, | |
| "step": 790, | |
| "valid_targets_mean": 575.7, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 1.9533169533169534, | |
| "grad_norm": 1.1044557075952837, | |
| "learning_rate": 3.414719034460928e-05, | |
| "loss": 0.1911, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22668087482452393, | |
| "step": 795, | |
| "valid_targets_mean": 564.0, | |
| "valid_targets_min": 358 | |
| }, | |
| { | |
| "epoch": 1.9656019656019657, | |
| "grad_norm": 1.0816888460619751, | |
| "learning_rate": 3.404575327436294e-05, | |
| "loss": 0.1979, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2019120752811432, | |
| "step": 800, | |
| "valid_targets_mean": 579.1, | |
| "valid_targets_min": 368 | |
| }, | |
| { | |
| "epoch": 1.9778869778869779, | |
| "grad_norm": 0.9653532634042872, | |
| "learning_rate": 3.3943598206792665e-05, | |
| "loss": 0.1951, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1838877946138382, | |
| "step": 805, | |
| "valid_targets_mean": 610.0, | |
| "valid_targets_min": 383 | |
| }, | |
| { | |
| "epoch": 1.9901719901719903, | |
| "grad_norm": 1.1415332941821148, | |
| "learning_rate": 3.384073036390857e-05, | |
| "loss": 0.1965, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20956549048423767, | |
| "step": 810, | |
| "valid_targets_mean": 501.6, | |
| "valid_targets_min": 304 | |
| }, | |
| { | |
| "epoch": 2.0024570024570023, | |
| "grad_norm": 0.9598781701745787, | |
| "learning_rate": 3.373715500415667e-05, | |
| "loss": 0.1931, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1695285439491272, | |
| "step": 815, | |
| "valid_targets_mean": 518.5, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 2.0147420147420148, | |
| "grad_norm": 1.0104897625706792, | |
| "learning_rate": 3.363287742215023e-05, | |
| "loss": 0.1674, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1609475016593933, | |
| "step": 820, | |
| "valid_targets_mean": 606.6, | |
| "valid_targets_min": 355 | |
| }, | |
| { | |
| "epoch": 2.027027027027027, | |
| "grad_norm": 1.1012228379960987, | |
| "learning_rate": 3.352790294839898e-05, | |
| "loss": 0.1721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16471633315086365, | |
| "step": 825, | |
| "valid_targets_mean": 617.8, | |
| "valid_targets_min": 360 | |
| }, | |
| { | |
| "epoch": 2.039312039312039, | |
| "grad_norm": 1.0191591931441848, | |
| "learning_rate": 3.3422236949036726e-05, | |
| "loss": 0.1628, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1552569568157196, | |
| "step": 830, | |
| "valid_targets_mean": 604.6, | |
| "valid_targets_min": 405 | |
| }, | |
| { | |
| "epoch": 2.0515970515970516, | |
| "grad_norm": 0.9959292379606938, | |
| "learning_rate": 3.331588482554697e-05, | |
| "loss": 0.1633, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.165554940700531, | |
| "step": 835, | |
| "valid_targets_mean": 608.4, | |
| "valid_targets_min": 381 | |
| }, | |
| { | |
| "epoch": 2.063882063882064, | |
| "grad_norm": 0.9912133098541214, | |
| "learning_rate": 3.320885201448684e-05, | |
| "loss": 0.1711, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1614345908164978, | |
| "step": 840, | |
| "valid_targets_mean": 567.6, | |
| "valid_targets_min": 317 | |
| }, | |
| { | |
| "epoch": 2.076167076167076, | |
| "grad_norm": 1.310631256990444, | |
| "learning_rate": 3.310114398720917e-05, | |
| "loss": 0.1621, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17154039442539215, | |
| "step": 845, | |
| "valid_targets_mean": 599.4, | |
| "valid_targets_min": 411 | |
| }, | |
| { | |
| "epoch": 2.0884520884520885, | |
| "grad_norm": 1.1094181190105217, | |
| "learning_rate": 3.299276624958281e-05, | |
| "loss": 0.1673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1730373501777649, | |
| "step": 850, | |
| "valid_targets_mean": 633.8, | |
| "valid_targets_min": 379 | |
| }, | |
| { | |
| "epoch": 2.100737100737101, | |
| "grad_norm": 1.0089696788731093, | |
| "learning_rate": 3.288372434171116e-05, | |
| "loss": 0.1621, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1542433649301529, | |
| "step": 855, | |
| "valid_targets_mean": 556.9, | |
| "valid_targets_min": 350 | |
| }, | |
| { | |
| "epoch": 2.113022113022113, | |
| "grad_norm": 1.1668954830382665, | |
| "learning_rate": 3.2774023837648986e-05, | |
| "loss": 0.1638, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18032445013523102, | |
| "step": 860, | |
| "valid_targets_mean": 595.8, | |
| "valid_targets_min": 372 | |
| }, | |
| { | |
| "epoch": 2.1253071253071254, | |
| "grad_norm": 0.9857434649722174, | |
| "learning_rate": 3.26636703451175e-05, | |
| "loss": 0.1642, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14477500319480896, | |
| "step": 865, | |
| "valid_targets_mean": 607.5, | |
| "valid_targets_min": 356 | |
| }, | |
| { | |
| "epoch": 2.1375921375921374, | |
| "grad_norm": 0.968780124957847, | |
| "learning_rate": 3.2552669505217646e-05, | |
| "loss": 0.1623, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15161213278770447, | |
| "step": 870, | |
| "valid_targets_mean": 597.6, | |
| "valid_targets_min": 360 | |
| }, | |
| { | |
| "epoch": 2.14987714987715, | |
| "grad_norm": 0.9765410163261161, | |
| "learning_rate": 3.24410269921418e-05, | |
| "loss": 0.1623, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14507704973220825, | |
| "step": 875, | |
| "valid_targets_mean": 593.9, | |
| "valid_targets_min": 417 | |
| }, | |
| { | |
| "epoch": 2.1621621621621623, | |
| "grad_norm": 1.0775768198373612, | |
| "learning_rate": 3.232874851288367e-05, | |
| "loss": 0.1691, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1555686593055725, | |
| "step": 880, | |
| "valid_targets_mean": 521.9, | |
| "valid_targets_min": 341 | |
| }, | |
| { | |
| "epoch": 2.1744471744471743, | |
| "grad_norm": 0.9514600306163379, | |
| "learning_rate": 3.221583980694659e-05, | |
| "loss": 0.1697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1574643850326538, | |
| "step": 885, | |
| "valid_targets_mean": 628.7, | |
| "valid_targets_min": 357 | |
| }, | |
| { | |
| "epoch": 2.1867321867321867, | |
| "grad_norm": 1.2201470319941308, | |
| "learning_rate": 3.21023066460501e-05, | |
| "loss": 0.1635, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16941586136817932, | |
| "step": 890, | |
| "valid_targets_mean": 501.3, | |
| "valid_targets_min": 357 | |
| }, | |
| { | |
| "epoch": 2.199017199017199, | |
| "grad_norm": 0.9973297712090284, | |
| "learning_rate": 3.198815483383492e-05, | |
| "loss": 0.1661, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1548662930727005, | |
| "step": 895, | |
| "valid_targets_mean": 588.8, | |
| "valid_targets_min": 447 | |
| }, | |
| { | |
| "epoch": 2.211302211302211, | |
| "grad_norm": 1.109909348492073, | |
| "learning_rate": 3.1873390205566295e-05, | |
| "loss": 0.1625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18065625429153442, | |
| "step": 900, | |
| "valid_targets_mean": 611.1, | |
| "valid_targets_min": 381 | |
| }, | |
| { | |
| "epoch": 2.2235872235872236, | |
| "grad_norm": 1.150607769729905, | |
| "learning_rate": 3.175801862783565e-05, | |
| "loss": 0.1676, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1760374903678894, | |
| "step": 905, | |
| "valid_targets_mean": 579.7, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 2.235872235872236, | |
| "grad_norm": 1.0134504400046016, | |
| "learning_rate": 3.164204599826077e-05, | |
| "loss": 0.1679, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15869474411010742, | |
| "step": 910, | |
| "valid_targets_mean": 628.8, | |
| "valid_targets_min": 387 | |
| }, | |
| { | |
| "epoch": 2.248157248157248, | |
| "grad_norm": 1.03636879858501, | |
| "learning_rate": 3.1525478245184245e-05, | |
| "loss": 0.161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17024025321006775, | |
| "step": 915, | |
| "valid_targets_mean": 638.8, | |
| "valid_targets_min": 338 | |
| }, | |
| { | |
| "epoch": 2.2604422604422605, | |
| "grad_norm": 1.027260075646475, | |
| "learning_rate": 3.140832132737051e-05, | |
| "loss": 0.1598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16474905610084534, | |
| "step": 920, | |
| "valid_targets_mean": 634.4, | |
| "valid_targets_min": 351 | |
| }, | |
| { | |
| "epoch": 2.2727272727272725, | |
| "grad_norm": 1.1516883647794431, | |
| "learning_rate": 3.129058123370116e-05, | |
| "loss": 0.1655, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16940803825855255, | |
| "step": 925, | |
| "valid_targets_mean": 556.1, | |
| "valid_targets_min": 357 | |
| }, | |
| { | |
| "epoch": 2.285012285012285, | |
| "grad_norm": 0.9664843540102078, | |
| "learning_rate": 3.117226398286887e-05, | |
| "loss": 0.1602, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1558990180492401, | |
| "step": 930, | |
| "valid_targets_mean": 644.6, | |
| "valid_targets_min": 341 | |
| }, | |
| { | |
| "epoch": 2.2972972972972974, | |
| "grad_norm": 1.2097643368196167, | |
| "learning_rate": 3.105337562306968e-05, | |
| "loss": 0.1674, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16575542092323303, | |
| "step": 935, | |
| "valid_targets_mean": 512.9, | |
| "valid_targets_min": 375 | |
| }, | |
| { | |
| "epoch": 2.30958230958231, | |
| "grad_norm": 1.0251584186027345, | |
| "learning_rate": 3.0933922231693854e-05, | |
| "loss": 0.1607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16751596331596375, | |
| "step": 940, | |
| "valid_targets_mean": 659.8, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 2.321867321867322, | |
| "grad_norm": 1.8223152408400631, | |
| "learning_rate": 3.08139099150152e-05, | |
| "loss": 0.1648, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17123517394065857, | |
| "step": 945, | |
| "valid_targets_mean": 623.1, | |
| "valid_targets_min": 346 | |
| }, | |
| { | |
| "epoch": 2.3341523341523343, | |
| "grad_norm": 1.0197979025893136, | |
| "learning_rate": 3.069334480787893e-05, | |
| "loss": 0.1638, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1619846671819687, | |
| "step": 950, | |
| "valid_targets_mean": 516.6, | |
| "valid_targets_min": 360 | |
| }, | |
| { | |
| "epoch": 2.3464373464373462, | |
| "grad_norm": 1.0185481805967684, | |
| "learning_rate": 3.057223307338806e-05, | |
| "loss": 0.1587, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16147619485855103, | |
| "step": 955, | |
| "valid_targets_mean": 610.8, | |
| "valid_targets_min": 321 | |
| }, | |
| { | |
| "epoch": 2.3587223587223587, | |
| "grad_norm": 1.1629489871911958, | |
| "learning_rate": 3.0450580902588346e-05, | |
| "loss": 0.1675, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18816746771335602, | |
| "step": 960, | |
| "valid_targets_mean": 530.4, | |
| "valid_targets_min": 370 | |
| }, | |
| { | |
| "epoch": 2.371007371007371, | |
| "grad_norm": 0.9701315397422264, | |
| "learning_rate": 3.032839451415182e-05, | |
| "loss": 0.1623, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15979017317295074, | |
| "step": 965, | |
| "valid_targets_mean": 634.5, | |
| "valid_targets_min": 366 | |
| }, | |
| { | |
| "epoch": 2.383292383292383, | |
| "grad_norm": 0.99312578094489, | |
| "learning_rate": 3.0205680154058904e-05, | |
| "loss": 0.1573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15236841142177582, | |
| "step": 970, | |
| "valid_targets_mean": 553.4, | |
| "valid_targets_min": 362 | |
| }, | |
| { | |
| "epoch": 2.3955773955773956, | |
| "grad_norm": 1.0612042490649, | |
| "learning_rate": 3.0082444095279117e-05, | |
| "loss": 0.1681, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17524561285972595, | |
| "step": 975, | |
| "valid_targets_mean": 595.1, | |
| "valid_targets_min": 377 | |
| }, | |
| { | |
| "epoch": 2.407862407862408, | |
| "grad_norm": 1.038886957377111, | |
| "learning_rate": 2.9958692637450406e-05, | |
| "loss": 0.1636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.147619366645813, | |
| "step": 980, | |
| "valid_targets_mean": 583.9, | |
| "valid_targets_min": 369 | |
| }, | |
| { | |
| "epoch": 2.42014742014742, | |
| "grad_norm": 1.037085869783086, | |
| "learning_rate": 2.983443210655714e-05, | |
| "loss": 0.1584, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15583443641662598, | |
| "step": 985, | |
| "valid_targets_mean": 515.8, | |
| "valid_targets_min": 328 | |
| }, | |
| { | |
| "epoch": 2.4324324324324325, | |
| "grad_norm": 1.0353541133091022, | |
| "learning_rate": 2.9709668854606706e-05, | |
| "loss": 0.1705, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15763302147388458, | |
| "step": 990, | |
| "valid_targets_mean": 585.6, | |
| "valid_targets_min": 397 | |
| }, | |
| { | |
| "epoch": 2.444717444717445, | |
| "grad_norm": 1.022215789393668, | |
| "learning_rate": 2.9584409259304828e-05, | |
| "loss": 0.1747, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17135146260261536, | |
| "step": 995, | |
| "valid_targets_mean": 552.5, | |
| "valid_targets_min": 318 | |
| }, | |
| { | |
| "epoch": 2.457002457002457, | |
| "grad_norm": 1.1712513338241275, | |
| "learning_rate": 2.945865972372954e-05, | |
| "loss": 0.1699, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1690291315317154, | |
| "step": 1000, | |
| "valid_targets_mean": 564.1, | |
| "valid_targets_min": 362 | |
| }, | |
| { | |
| "epoch": 2.4692874692874693, | |
| "grad_norm": 1.0317993574285669, | |
| "learning_rate": 2.9332426676003858e-05, | |
| "loss": 0.1627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14596088230609894, | |
| "step": 1005, | |
| "valid_targets_mean": 628.4, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 2.4815724815724813, | |
| "grad_norm": 1.15263750619694, | |
| "learning_rate": 2.920571656896722e-05, | |
| "loss": 0.163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1633550375699997, | |
| "step": 1010, | |
| "valid_targets_mean": 595.0, | |
| "valid_targets_min": 338 | |
| }, | |
| { | |
| "epoch": 2.493857493857494, | |
| "grad_norm": 1.196927272455729, | |
| "learning_rate": 2.907853587984558e-05, | |
| "loss": 0.1665, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18414945900440216, | |
| "step": 1015, | |
| "valid_targets_mean": 529.7, | |
| "valid_targets_min": 324 | |
| }, | |
| { | |
| "epoch": 2.506142506142506, | |
| "grad_norm": 1.008390669336626, | |
| "learning_rate": 2.8950891109920333e-05, | |
| "loss": 0.171, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17250725626945496, | |
| "step": 1020, | |
| "valid_targets_mean": 554.2, | |
| "valid_targets_min": 341 | |
| }, | |
| { | |
| "epoch": 2.5184275184275187, | |
| "grad_norm": 1.1830974634404077, | |
| "learning_rate": 2.882278878419597e-05, | |
| "loss": 0.1704, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17889085412025452, | |
| "step": 1025, | |
| "valid_targets_mean": 532.0, | |
| "valid_targets_min": 336 | |
| }, | |
| { | |
| "epoch": 2.5307125307125307, | |
| "grad_norm": 1.225171314277819, | |
| "learning_rate": 2.8694235451066538e-05, | |
| "loss": 0.1646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1738959401845932, | |
| "step": 1030, | |
| "valid_targets_mean": 521.2, | |
| "valid_targets_min": 343 | |
| }, | |
| { | |
| "epoch": 2.542997542997543, | |
| "grad_norm": 1.0699171978912032, | |
| "learning_rate": 2.8565237681980876e-05, | |
| "loss": 0.1597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1724793016910553, | |
| "step": 1035, | |
| "valid_targets_mean": 602.1, | |
| "valid_targets_min": 365 | |
| }, | |
| { | |
| "epoch": 2.555282555282555, | |
| "grad_norm": 0.9758834051911167, | |
| "learning_rate": 2.843580207110672e-05, | |
| "loss": 0.164, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1547844111919403, | |
| "step": 1040, | |
| "valid_targets_mean": 624.6, | |
| "valid_targets_min": 348 | |
| }, | |
| { | |
| "epoch": 2.5675675675675675, | |
| "grad_norm": 1.0648767118155311, | |
| "learning_rate": 2.830593523499361e-05, | |
| "loss": 0.1712, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17022374272346497, | |
| "step": 1045, | |
| "valid_targets_mean": 528.1, | |
| "valid_targets_min": 316 | |
| }, | |
| { | |
| "epoch": 2.57985257985258, | |
| "grad_norm": 1.1786460270152923, | |
| "learning_rate": 2.8175643812234627e-05, | |
| "loss": 0.1657, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17464269697666168, | |
| "step": 1050, | |
| "valid_targets_mean": 482.8, | |
| "valid_targets_min": 326 | |
| }, | |
| { | |
| "epoch": 2.592137592137592, | |
| "grad_norm": 1.015941200408563, | |
| "learning_rate": 2.8044934463127108e-05, | |
| "loss": 0.1635, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15763753652572632, | |
| "step": 1055, | |
| "valid_targets_mean": 595.5, | |
| "valid_targets_min": 415 | |
| }, | |
| { | |
| "epoch": 2.6044226044226044, | |
| "grad_norm": 1.5455311111736163, | |
| "learning_rate": 2.7913813869332112e-05, | |
| "loss": 0.1613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16405543684959412, | |
| "step": 1060, | |
| "valid_targets_mean": 553.4, | |
| "valid_targets_min": 337 | |
| }, | |
| { | |
| "epoch": 2.616707616707617, | |
| "grad_norm": 1.0206262986883972, | |
| "learning_rate": 2.7782288733532915e-05, | |
| "loss": 0.1653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15845853090286255, | |
| "step": 1065, | |
| "valid_targets_mean": 617.5, | |
| "valid_targets_min": 377 | |
| }, | |
| { | |
| "epoch": 2.628992628992629, | |
| "grad_norm": 1.0127994524563275, | |
| "learning_rate": 2.7650365779092346e-05, | |
| "loss": 0.1574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15306022763252258, | |
| "step": 1070, | |
| "valid_targets_mean": 574.9, | |
| "valid_targets_min": 343 | |
| }, | |
| { | |
| "epoch": 2.6412776412776413, | |
| "grad_norm": 1.0920854825298392, | |
| "learning_rate": 2.751805174970912e-05, | |
| "loss": 0.1656, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1654532253742218, | |
| "step": 1075, | |
| "valid_targets_mean": 563.6, | |
| "valid_targets_min": 372 | |
| }, | |
| { | |
| "epoch": 2.6535626535626538, | |
| "grad_norm": 1.3337934562654803, | |
| "learning_rate": 2.7385353409073093e-05, | |
| "loss": 0.1684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18576306104660034, | |
| "step": 1080, | |
| "valid_targets_mean": 563.4, | |
| "valid_targets_min": 340 | |
| }, | |
| { | |
| "epoch": 2.6658476658476657, | |
| "grad_norm": 1.1468407631418238, | |
| "learning_rate": 2.725227754051953e-05, | |
| "loss": 0.1625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17222510278224945, | |
| "step": 1085, | |
| "valid_targets_mean": 532.2, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 2.678132678132678, | |
| "grad_norm": 1.0145689177041828, | |
| "learning_rate": 2.711883094668234e-05, | |
| "loss": 0.1576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16334547102451324, | |
| "step": 1090, | |
| "valid_targets_mean": 712.4, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 2.69041769041769, | |
| "grad_norm": 1.0436018416176482, | |
| "learning_rate": 2.698502044914633e-05, | |
| "loss": 0.1675, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18585729598999023, | |
| "step": 1095, | |
| "valid_targets_mean": 637.7, | |
| "valid_targets_min": 358 | |
| }, | |
| { | |
| "epoch": 2.7027027027027026, | |
| "grad_norm": 1.065555873590499, | |
| "learning_rate": 2.685085288809853e-05, | |
| "loss": 0.1645, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17872926592826843, | |
| "step": 1100, | |
| "valid_targets_mean": 615.6, | |
| "valid_targets_min": 339 | |
| }, | |
| { | |
| "epoch": 2.714987714987715, | |
| "grad_norm": 1.105393129040058, | |
| "learning_rate": 2.671633512197848e-05, | |
| "loss": 0.165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1631755530834198, | |
| "step": 1105, | |
| "valid_targets_mean": 546.8, | |
| "valid_targets_min": 318 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 1.1349764871912573, | |
| "learning_rate": 2.658147402712768e-05, | |
| "loss": 0.1717, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1780092418193817, | |
| "step": 1110, | |
| "valid_targets_mean": 549.3, | |
| "valid_targets_min": 308 | |
| }, | |
| { | |
| "epoch": 2.7395577395577395, | |
| "grad_norm": 1.0248807207449195, | |
| "learning_rate": 2.6446276497438064e-05, | |
| "loss": 0.1595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1788291037082672, | |
| "step": 1115, | |
| "valid_targets_mean": 619.8, | |
| "valid_targets_min": 315 | |
| }, | |
| { | |
| "epoch": 2.751842751842752, | |
| "grad_norm": 1.0236466528288748, | |
| "learning_rate": 2.6310749443999593e-05, | |
| "loss": 0.167, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18527953326702118, | |
| "step": 1120, | |
| "valid_targets_mean": 704.7, | |
| "valid_targets_min": 386 | |
| }, | |
| { | |
| "epoch": 2.764127764127764, | |
| "grad_norm": 1.0161666743813242, | |
| "learning_rate": 2.617489979474699e-05, | |
| "loss": 0.159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16755788028240204, | |
| "step": 1125, | |
| "valid_targets_mean": 635.2, | |
| "valid_targets_min": 327 | |
| }, | |
| { | |
| "epoch": 2.7764127764127764, | |
| "grad_norm": 1.1229342619607572, | |
| "learning_rate": 2.6038734494105562e-05, | |
| "loss": 0.1683, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17469602823257446, | |
| "step": 1130, | |
| "valid_targets_mean": 522.4, | |
| "valid_targets_min": 306 | |
| }, | |
| { | |
| "epoch": 2.788697788697789, | |
| "grad_norm": 1.1045159313327992, | |
| "learning_rate": 2.590226050263625e-05, | |
| "loss": 0.1604, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1623562127351761, | |
| "step": 1135, | |
| "valid_targets_mean": 559.4, | |
| "valid_targets_min": 340 | |
| }, | |
| { | |
| "epoch": 2.800982800982801, | |
| "grad_norm": 1.0512684811170392, | |
| "learning_rate": 2.5765484796679768e-05, | |
| "loss": 0.1664, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17947736382484436, | |
| "step": 1140, | |
| "valid_targets_mean": 643.2, | |
| "valid_targets_min": 343 | |
| }, | |
| { | |
| "epoch": 2.8132678132678133, | |
| "grad_norm": 1.0378515532484835, | |
| "learning_rate": 2.5628414368000035e-05, | |
| "loss": 0.1651, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15963560342788696, | |
| "step": 1145, | |
| "valid_targets_mean": 604.6, | |
| "valid_targets_min": 381 | |
| }, | |
| { | |
| "epoch": 2.8255528255528253, | |
| "grad_norm": 1.1752740327029623, | |
| "learning_rate": 2.5491056223426746e-05, | |
| "loss": 0.1685, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17332467436790466, | |
| "step": 1150, | |
| "valid_targets_mean": 508.6, | |
| "valid_targets_min": 329 | |
| }, | |
| { | |
| "epoch": 2.8378378378378377, | |
| "grad_norm": 1.0171284826487994, | |
| "learning_rate": 2.5353417384497166e-05, | |
| "loss": 0.1632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1706382930278778, | |
| "step": 1155, | |
| "valid_targets_mean": 620.8, | |
| "valid_targets_min": 406 | |
| }, | |
| { | |
| "epoch": 2.85012285012285, | |
| "grad_norm": 1.1287447526542342, | |
| "learning_rate": 2.5215504887097243e-05, | |
| "loss": 0.1673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19561892747879028, | |
| "step": 1160, | |
| "valid_targets_mean": 601.8, | |
| "valid_targets_min": 340 | |
| }, | |
| { | |
| "epoch": 2.8624078624078626, | |
| "grad_norm": 0.9610766649005913, | |
| "learning_rate": 2.5077325781101918e-05, | |
| "loss": 0.1594, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14945566654205322, | |
| "step": 1165, | |
| "valid_targets_mean": 570.4, | |
| "valid_targets_min": 342 | |
| }, | |
| { | |
| "epoch": 2.8746928746928746, | |
| "grad_norm": 1.0227531905951595, | |
| "learning_rate": 2.493888713001476e-05, | |
| "loss": 0.1673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16126927733421326, | |
| "step": 1170, | |
| "valid_targets_mean": 642.7, | |
| "valid_targets_min": 359 | |
| }, | |
| { | |
| "epoch": 2.886977886977887, | |
| "grad_norm": 1.0837878704674906, | |
| "learning_rate": 2.480019601060687e-05, | |
| "loss": 0.1593, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17691218852996826, | |
| "step": 1175, | |
| "valid_targets_mean": 650.2, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 2.899262899262899, | |
| "grad_norm": 1.1939077938787526, | |
| "learning_rate": 2.4661259512555176e-05, | |
| "loss": 0.1712, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17172303795814514, | |
| "step": 1180, | |
| "valid_targets_mean": 516.2, | |
| "valid_targets_min": 359 | |
| }, | |
| { | |
| "epoch": 2.9115479115479115, | |
| "grad_norm": 1.0876622788145698, | |
| "learning_rate": 2.4522084738079933e-05, | |
| "loss": 0.1724, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20079737901687622, | |
| "step": 1185, | |
| "valid_targets_mean": 650.1, | |
| "valid_targets_min": 347 | |
| }, | |
| { | |
| "epoch": 2.923832923832924, | |
| "grad_norm": 1.139247105803865, | |
| "learning_rate": 2.4382678801581762e-05, | |
| "loss": 0.1625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15431401133537292, | |
| "step": 1190, | |
| "valid_targets_mean": 504.2, | |
| "valid_targets_min": 345 | |
| }, | |
| { | |
| "epoch": 2.9361179361179364, | |
| "grad_norm": 1.1904829689631444, | |
| "learning_rate": 2.4243048829277916e-05, | |
| "loss": 0.1646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15002189576625824, | |
| "step": 1195, | |
| "valid_targets_mean": 496.9, | |
| "valid_targets_min": 359 | |
| }, | |
| { | |
| "epoch": 2.9484029484029484, | |
| "grad_norm": 1.0235188956010766, | |
| "learning_rate": 2.410320195883802e-05, | |
| "loss": 0.1628, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16833597421646118, | |
| "step": 1200, | |
| "valid_targets_mean": 663.0, | |
| "valid_targets_min": 341 | |
| }, | |
| { | |
| "epoch": 2.960687960687961, | |
| "grad_norm": 1.0386284807339494, | |
| "learning_rate": 2.396314533901918e-05, | |
| "loss": 0.1679, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1625211238861084, | |
| "step": 1205, | |
| "valid_targets_mean": 541.9, | |
| "valid_targets_min": 357 | |
| }, | |
| { | |
| "epoch": 2.972972972972973, | |
| "grad_norm": 1.0553260194827612, | |
| "learning_rate": 2.3822886129300603e-05, | |
| "loss": 0.1636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16509494185447693, | |
| "step": 1210, | |
| "valid_targets_mean": 593.0, | |
| "valid_targets_min": 350 | |
| }, | |
| { | |
| "epoch": 2.9852579852579852, | |
| "grad_norm": 1.1291087525354948, | |
| "learning_rate": 2.368243149951755e-05, | |
| "loss": 0.1639, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16854259371757507, | |
| "step": 1215, | |
| "valid_targets_mean": 540.3, | |
| "valid_targets_min": 336 | |
| }, | |
| { | |
| "epoch": 2.9975429975429977, | |
| "grad_norm": 1.0280421294913595, | |
| "learning_rate": 2.3541788629494865e-05, | |
| "loss": 0.1668, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.154227614402771, | |
| "step": 1220, | |
| "valid_targets_mean": 537.2, | |
| "valid_targets_min": 353 | |
| }, | |
| { | |
| "epoch": 3.0098280098280097, | |
| "grad_norm": 0.9438151898624106, | |
| "learning_rate": 2.3400964708679944e-05, | |
| "loss": 0.1331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11106042563915253, | |
| "step": 1225, | |
| "valid_targets_mean": 543.3, | |
| "valid_targets_min": 348 | |
| }, | |
| { | |
| "epoch": 3.022113022113022, | |
| "grad_norm": 1.3092340644616152, | |
| "learning_rate": 2.325996693577522e-05, | |
| "loss": 0.1314, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13556016981601715, | |
| "step": 1230, | |
| "valid_targets_mean": 569.0, | |
| "valid_targets_min": 344 | |
| }, | |
| { | |
| "epoch": 3.0343980343980346, | |
| "grad_norm": 1.1917250259707166, | |
| "learning_rate": 2.311880251837019e-05, | |
| "loss": 0.1298, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12731008231639862, | |
| "step": 1235, | |
| "valid_targets_mean": 621.3, | |
| "valid_targets_min": 363 | |
| }, | |
| { | |
| "epoch": 3.0466830466830466, | |
| "grad_norm": 1.171746419744826, | |
| "learning_rate": 2.2977478672572933e-05, | |
| "loss": 0.1348, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14058241248130798, | |
| "step": 1240, | |
| "valid_targets_mean": 583.0, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 3.058968058968059, | |
| "grad_norm": 0.9850462514719055, | |
| "learning_rate": 2.2836002622641297e-05, | |
| "loss": 0.1246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12502151727676392, | |
| "step": 1245, | |
| "valid_targets_mean": 655.2, | |
| "valid_targets_min": 426 | |
| }, | |
| { | |
| "epoch": 3.0712530712530715, | |
| "grad_norm": 1.1523919057407686, | |
| "learning_rate": 2.269438160061354e-05, | |
| "loss": 0.1286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1243138313293457, | |
| "step": 1250, | |
| "valid_targets_mean": 538.6, | |
| "valid_targets_min": 298 | |
| }, | |
| { | |
| "epoch": 3.0835380835380835, | |
| "grad_norm": 1.1105751349907576, | |
| "learning_rate": 2.2552622845938698e-05, | |
| "loss": 0.1226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11567364633083344, | |
| "step": 1255, | |
| "valid_targets_mean": 508.1, | |
| "valid_targets_min": 357 | |
| }, | |
| { | |
| "epoch": 3.095823095823096, | |
| "grad_norm": 1.1173952059466155, | |
| "learning_rate": 2.2410733605106462e-05, | |
| "loss": 0.1335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14052309095859528, | |
| "step": 1260, | |
| "valid_targets_mean": 600.1, | |
| "valid_targets_min": 369 | |
| }, | |
| { | |
| "epoch": 3.108108108108108, | |
| "grad_norm": 1.133256035223599, | |
| "learning_rate": 2.2268721131276805e-05, | |
| "loss": 0.1306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1453954577445984, | |
| "step": 1265, | |
| "valid_targets_mean": 594.8, | |
| "valid_targets_min": 364 | |
| }, | |
| { | |
| "epoch": 3.1203931203931203, | |
| "grad_norm": 1.045210794343584, | |
| "learning_rate": 2.2126592683909154e-05, | |
| "loss": 0.1228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12265928834676743, | |
| "step": 1270, | |
| "valid_targets_mean": 683.3, | |
| "valid_targets_min": 386 | |
| }, | |
| { | |
| "epoch": 3.1326781326781328, | |
| "grad_norm": 1.2110743775006845, | |
| "learning_rate": 2.1984355528391342e-05, | |
| "loss": 0.1279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13180884718894958, | |
| "step": 1275, | |
| "valid_targets_mean": 560.5, | |
| "valid_targets_min": 329 | |
| }, | |
| { | |
| "epoch": 3.1449631449631448, | |
| "grad_norm": 1.1434541255684711, | |
| "learning_rate": 2.1842016935668188e-05, | |
| "loss": 0.1282, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12741702795028687, | |
| "step": 1280, | |
| "valid_targets_mean": 549.2, | |
| "valid_targets_min": 379 | |
| }, | |
| { | |
| "epoch": 3.157248157248157, | |
| "grad_norm": 1.0439156376434802, | |
| "learning_rate": 2.169958418186982e-05, | |
| "loss": 0.1341, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12404695153236389, | |
| "step": 1285, | |
| "valid_targets_mean": 665.5, | |
| "valid_targets_min": 318 | |
| }, | |
| { | |
| "epoch": 3.1695331695331697, | |
| "grad_norm": 1.0124516604323626, | |
| "learning_rate": 2.1557064547939754e-05, | |
| "loss": 0.1315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11568133533000946, | |
| "step": 1290, | |
| "valid_targets_mean": 624.2, | |
| "valid_targets_min": 350 | |
| }, | |
| { | |
| "epoch": 3.1818181818181817, | |
| "grad_norm": 1.0468568345799445, | |
| "learning_rate": 2.1414465319262666e-05, | |
| "loss": 0.1371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12586909532546997, | |
| "step": 1295, | |
| "valid_targets_mean": 592.6, | |
| "valid_targets_min": 377 | |
| }, | |
| { | |
| "epoch": 3.194103194103194, | |
| "grad_norm": 1.2355339893235782, | |
| "learning_rate": 2.1271793785291997e-05, | |
| "loss": 0.1272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12561175227165222, | |
| "step": 1300, | |
| "valid_targets_mean": 503.3, | |
| "valid_targets_min": 326 | |
| }, | |
| { | |
| "epoch": 3.2063882063882065, | |
| "grad_norm": 1.2213272421816683, | |
| "learning_rate": 2.1129057239177337e-05, | |
| "loss": 0.1316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13759493827819824, | |
| "step": 1305, | |
| "valid_targets_mean": 592.8, | |
| "valid_targets_min": 336 | |
| }, | |
| { | |
| "epoch": 3.2186732186732185, | |
| "grad_norm": 1.1733266254515748, | |
| "learning_rate": 2.0986262977391577e-05, | |
| "loss": 0.1319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1188465803861618, | |
| "step": 1310, | |
| "valid_targets_mean": 577.1, | |
| "valid_targets_min": 366 | |
| }, | |
| { | |
| "epoch": 3.230958230958231, | |
| "grad_norm": 1.0329034457428024, | |
| "learning_rate": 2.084341829935796e-05, | |
| "loss": 0.1312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13047614693641663, | |
| "step": 1315, | |
| "valid_targets_mean": 648.7, | |
| "valid_targets_min": 429 | |
| }, | |
| { | |
| "epoch": 3.2432432432432434, | |
| "grad_norm": 1.0872058545288361, | |
| "learning_rate": 2.0700530507076916e-05, | |
| "loss": 0.1219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11639399826526642, | |
| "step": 1320, | |
| "valid_targets_mean": 631.0, | |
| "valid_targets_min": 355 | |
| }, | |
| { | |
| "epoch": 3.2555282555282554, | |
| "grad_norm": 1.1820198984468047, | |
| "learning_rate": 2.0557606904752833e-05, | |
| "loss": 0.1278, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13082380592823029, | |
| "step": 1325, | |
| "valid_targets_mean": 574.2, | |
| "valid_targets_min": 365 | |
| }, | |
| { | |
| "epoch": 3.267813267813268, | |
| "grad_norm": 1.1630414180551119, | |
| "learning_rate": 2.0414654798420622e-05, | |
| "loss": 0.1221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13212239742279053, | |
| "step": 1330, | |
| "valid_targets_mean": 576.9, | |
| "valid_targets_min": 300 | |
| }, | |
| { | |
| "epoch": 3.2800982800982803, | |
| "grad_norm": 1.0578455352391878, | |
| "learning_rate": 2.02716814955723e-05, | |
| "loss": 0.1269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11416147649288177, | |
| "step": 1335, | |
| "valid_targets_mean": 588.4, | |
| "valid_targets_min": 375 | |
| }, | |
| { | |
| "epoch": 3.2923832923832923, | |
| "grad_norm": 1.1805958220382198, | |
| "learning_rate": 2.0128694304783406e-05, | |
| "loss": 0.1305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1387619972229004, | |
| "step": 1340, | |
| "valid_targets_mean": 635.7, | |
| "valid_targets_min": 351 | |
| }, | |
| { | |
| "epoch": 3.3046683046683047, | |
| "grad_norm": 1.0482090682153697, | |
| "learning_rate": 1.9985700535339406e-05, | |
| "loss": 0.1296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11787254363298416, | |
| "step": 1345, | |
| "valid_targets_mean": 583.8, | |
| "valid_targets_min": 351 | |
| }, | |
| { | |
| "epoch": 3.3169533169533167, | |
| "grad_norm": 1.1169879054755618, | |
| "learning_rate": 1.984270749686207e-05, | |
| "loss": 0.1332, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12067554891109467, | |
| "step": 1350, | |
| "valid_targets_mean": 516.5, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 3.329238329238329, | |
| "grad_norm": 1.1551139226743778, | |
| "learning_rate": 1.9699722498935786e-05, | |
| "loss": 0.1347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13348305225372314, | |
| "step": 1355, | |
| "valid_targets_mean": 618.8, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 3.3415233415233416, | |
| "grad_norm": 1.0249399702817776, | |
| "learning_rate": 1.9556752850733933e-05, | |
| "loss": 0.1271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11070968210697174, | |
| "step": 1360, | |
| "valid_targets_mean": 561.4, | |
| "valid_targets_min": 347 | |
| }, | |
| { | |
| "epoch": 3.3538083538083536, | |
| "grad_norm": 1.1499498208012953, | |
| "learning_rate": 1.9413805860645242e-05, | |
| "loss": 0.1286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12810730934143066, | |
| "step": 1365, | |
| "valid_targets_mean": 703.3, | |
| "valid_targets_min": 311 | |
| }, | |
| { | |
| "epoch": 3.366093366093366, | |
| "grad_norm": 1.0876681376203914, | |
| "learning_rate": 1.9270888835900165e-05, | |
| "loss": 0.1296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13525883853435516, | |
| "step": 1370, | |
| "valid_targets_mean": 606.2, | |
| "valid_targets_min": 345 | |
| }, | |
| { | |
| "epoch": 3.3783783783783785, | |
| "grad_norm": 1.225696579696641, | |
| "learning_rate": 1.9128009082197417e-05, | |
| "loss": 0.1287, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13166263699531555, | |
| "step": 1375, | |
| "valid_targets_mean": 592.9, | |
| "valid_targets_min": 378 | |
| }, | |
| { | |
| "epoch": 3.3906633906633905, | |
| "grad_norm": 1.066556068908003, | |
| "learning_rate": 1.8985173903330428e-05, | |
| "loss": 0.1292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12290339171886444, | |
| "step": 1380, | |
| "valid_targets_mean": 688.7, | |
| "valid_targets_min": 357 | |
| }, | |
| { | |
| "epoch": 3.402948402948403, | |
| "grad_norm": 1.0572944355622975, | |
| "learning_rate": 1.884239060081407e-05, | |
| "loss": 0.1264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12135250866413116, | |
| "step": 1385, | |
| "valid_targets_mean": 587.2, | |
| "valid_targets_min": 318 | |
| }, | |
| { | |
| "epoch": 3.4152334152334154, | |
| "grad_norm": 1.1838129653133687, | |
| "learning_rate": 1.869966647351135e-05, | |
| "loss": 0.1234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12669309973716736, | |
| "step": 1390, | |
| "valid_targets_mean": 583.1, | |
| "valid_targets_min": 351 | |
| }, | |
| { | |
| "epoch": 3.4275184275184274, | |
| "grad_norm": 1.0319091113711576, | |
| "learning_rate": 1.8557008817260343e-05, | |
| "loss": 0.1306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12052074819803238, | |
| "step": 1395, | |
| "valid_targets_mean": 611.9, | |
| "valid_targets_min": 335 | |
| }, | |
| { | |
| "epoch": 3.43980343980344, | |
| "grad_norm": 1.1399305429197617, | |
| "learning_rate": 1.8414424924501222e-05, | |
| "loss": 0.1301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11919272691011429, | |
| "step": 1400, | |
| "valid_targets_mean": 465.2, | |
| "valid_targets_min": 344 | |
| }, | |
| { | |
| "epoch": 3.4520884520884523, | |
| "grad_norm": 1.177328920151525, | |
| "learning_rate": 1.827192208390347e-05, | |
| "loss": 0.1266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12182799726724625, | |
| "step": 1405, | |
| "valid_targets_mean": 594.9, | |
| "valid_targets_min": 320 | |
| }, | |
| { | |
| "epoch": 3.4643734643734643, | |
| "grad_norm": 1.1035517940031225, | |
| "learning_rate": 1.812950757999334e-05, | |
| "loss": 0.1279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12335000932216644, | |
| "step": 1410, | |
| "valid_targets_mean": 601.6, | |
| "valid_targets_min": 375 | |
| }, | |
| { | |
| "epoch": 3.4766584766584767, | |
| "grad_norm": 1.1686625404125646, | |
| "learning_rate": 1.7987188692781417e-05, | |
| "loss": 0.1275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12197911739349365, | |
| "step": 1415, | |
| "valid_targets_mean": 518.9, | |
| "valid_targets_min": 372 | |
| }, | |
| { | |
| "epoch": 3.488943488943489, | |
| "grad_norm": 1.1409244395482991, | |
| "learning_rate": 1.784497269739052e-05, | |
| "loss": 0.1339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12735916674137115, | |
| "step": 1420, | |
| "valid_targets_mean": 555.7, | |
| "valid_targets_min": 326 | |
| }, | |
| { | |
| "epoch": 3.501228501228501, | |
| "grad_norm": 1.15898956512608, | |
| "learning_rate": 1.770286686368381e-05, | |
| "loss": 0.1283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13501927256584167, | |
| "step": 1425, | |
| "valid_targets_mean": 619.3, | |
| "valid_targets_min": 359 | |
| }, | |
| { | |
| "epoch": 3.5135135135135136, | |
| "grad_norm": 1.098433144324709, | |
| "learning_rate": 1.756087845589312e-05, | |
| "loss": 0.131, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12755000591278076, | |
| "step": 1430, | |
| "valid_targets_mean": 583.7, | |
| "valid_targets_min": 348 | |
| }, | |
| { | |
| "epoch": 3.5257985257985256, | |
| "grad_norm": 1.2285008083017597, | |
| "learning_rate": 1.7419014732247683e-05, | |
| "loss": 0.1323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1387604922056198, | |
| "step": 1435, | |
| "valid_targets_mean": 548.3, | |
| "valid_targets_min": 321 | |
| }, | |
| { | |
| "epoch": 3.538083538083538, | |
| "grad_norm": 1.2016331632631438, | |
| "learning_rate": 1.7277282944603047e-05, | |
| "loss": 0.1373, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1299329698085785, | |
| "step": 1440, | |
| "valid_targets_mean": 572.7, | |
| "valid_targets_min": 331 | |
| }, | |
| { | |
| "epoch": 3.5503685503685505, | |
| "grad_norm": 1.0798388503657812, | |
| "learning_rate": 1.713569033807041e-05, | |
| "loss": 0.1313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12629565596580505, | |
| "step": 1445, | |
| "valid_targets_mean": 598.0, | |
| "valid_targets_min": 312 | |
| }, | |
| { | |
| "epoch": 3.562653562653563, | |
| "grad_norm": 1.1335346583884485, | |
| "learning_rate": 1.6994244150646244e-05, | |
| "loss": 0.1237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1171446442604065, | |
| "step": 1450, | |
| "valid_targets_mean": 486.2, | |
| "valid_targets_min": 338 | |
| }, | |
| { | |
| "epoch": 3.574938574938575, | |
| "grad_norm": 1.026365044309373, | |
| "learning_rate": 1.6852951612842278e-05, | |
| "loss": 0.1238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11981429159641266, | |
| "step": 1455, | |
| "valid_targets_mean": 735.9, | |
| "valid_targets_min": 392 | |
| }, | |
| { | |
| "epoch": 3.5872235872235874, | |
| "grad_norm": 1.1509255430926, | |
| "learning_rate": 1.671181994731595e-05, | |
| "loss": 0.1299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12611302733421326, | |
| "step": 1460, | |
| "valid_targets_mean": 626.5, | |
| "valid_targets_min": 378 | |
| }, | |
| { | |
| "epoch": 3.5995085995085994, | |
| "grad_norm": 1.1055170772072278, | |
| "learning_rate": 1.6570856368501108e-05, | |
| "loss": 0.1339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1377750188112259, | |
| "step": 1465, | |
| "valid_targets_mean": 683.5, | |
| "valid_targets_min": 388 | |
| }, | |
| { | |
| "epoch": 3.611793611793612, | |
| "grad_norm": 1.1283525993380816, | |
| "learning_rate": 1.643006808223931e-05, | |
| "loss": 0.1275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11461061239242554, | |
| "step": 1470, | |
| "valid_targets_mean": 523.2, | |
| "valid_targets_min": 365 | |
| }, | |
| { | |
| "epoch": 3.6240786240786242, | |
| "grad_norm": 1.1522419980137233, | |
| "learning_rate": 1.6289462285411387e-05, | |
| "loss": 0.1257, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13068246841430664, | |
| "step": 1475, | |
| "valid_targets_mean": 668.4, | |
| "valid_targets_min": 385 | |
| }, | |
| { | |
| "epoch": 3.6363636363636362, | |
| "grad_norm": 1.1631017873745493, | |
| "learning_rate": 1.614904616556962e-05, | |
| "loss": 0.1281, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12972646951675415, | |
| "step": 1480, | |
| "valid_targets_mean": 511.1, | |
| "valid_targets_min": 319 | |
| }, | |
| { | |
| "epoch": 3.6486486486486487, | |
| "grad_norm": 1.0535715929600475, | |
| "learning_rate": 1.6008826900570294e-05, | |
| "loss": 0.126, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12002302706241608, | |
| "step": 1485, | |
| "valid_targets_mean": 619.9, | |
| "valid_targets_min": 305 | |
| }, | |
| { | |
| "epoch": 3.6609336609336607, | |
| "grad_norm": 1.0292067655871422, | |
| "learning_rate": 1.586881165820675e-05, | |
| "loss": 0.1284, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12251895666122437, | |
| "step": 1490, | |
| "valid_targets_mean": 545.2, | |
| "valid_targets_min": 336 | |
| }, | |
| { | |
| "epoch": 3.673218673218673, | |
| "grad_norm": 1.165710854153103, | |
| "learning_rate": 1.5729007595843037e-05, | |
| "loss": 0.1276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1191115528345108, | |
| "step": 1495, | |
| "valid_targets_mean": 561.9, | |
| "valid_targets_min": 370 | |
| }, | |
| { | |
| "epoch": 3.6855036855036856, | |
| "grad_norm": 1.1331503839407906, | |
| "learning_rate": 1.5589421860047986e-05, | |
| "loss": 0.1325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13271397352218628, | |
| "step": 1500, | |
| "valid_targets_mean": 619.1, | |
| "valid_targets_min": 343 | |
| }, | |
| { | |
| "epoch": 3.697788697788698, | |
| "grad_norm": 1.1461701235051638, | |
| "learning_rate": 1.5450061586229903e-05, | |
| "loss": 0.1309, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12879186868667603, | |
| "step": 1505, | |
| "valid_targets_mean": 600.9, | |
| "valid_targets_min": 371 | |
| }, | |
| { | |
| "epoch": 3.71007371007371, | |
| "grad_norm": 1.1847404529689227, | |
| "learning_rate": 1.5310933898271864e-05, | |
| "loss": 0.1259, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1365867555141449, | |
| "step": 1510, | |
| "valid_targets_mean": 611.1, | |
| "valid_targets_min": 340 | |
| }, | |
| { | |
| "epoch": 3.7223587223587224, | |
| "grad_norm": 1.092144235161736, | |
| "learning_rate": 1.5172045908167462e-05, | |
| "loss": 0.1321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13623477518558502, | |
| "step": 1515, | |
| "valid_targets_mean": 634.0, | |
| "valid_targets_min": 367 | |
| }, | |
| { | |
| "epoch": 3.7346437346437344, | |
| "grad_norm": 1.1632897050883428, | |
| "learning_rate": 1.5033404715657344e-05, | |
| "loss": 0.1275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12677055597305298, | |
| "step": 1520, | |
| "valid_targets_mean": 525.4, | |
| "valid_targets_min": 353 | |
| }, | |
| { | |
| "epoch": 3.746928746928747, | |
| "grad_norm": 1.1658235409021696, | |
| "learning_rate": 1.4895017407866217e-05, | |
| "loss": 0.1372, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14368900656700134, | |
| "step": 1525, | |
| "valid_targets_mean": 561.8, | |
| "valid_targets_min": 358 | |
| }, | |
| { | |
| "epoch": 3.7592137592137593, | |
| "grad_norm": 1.161265061332121, | |
| "learning_rate": 1.4756891058940606e-05, | |
| "loss": 0.1289, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12873947620391846, | |
| "step": 1530, | |
| "valid_targets_mean": 572.2, | |
| "valid_targets_min": 303 | |
| }, | |
| { | |
| "epoch": 3.7714987714987718, | |
| "grad_norm": 1.194746454201963, | |
| "learning_rate": 1.4619032729687223e-05, | |
| "loss": 0.1302, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12980486452579498, | |
| "step": 1535, | |
| "valid_targets_mean": 531.4, | |
| "valid_targets_min": 403 | |
| }, | |
| { | |
| "epoch": 3.7837837837837838, | |
| "grad_norm": 1.1787818795890501, | |
| "learning_rate": 1.4481449467212004e-05, | |
| "loss": 0.1325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13438472151756287, | |
| "step": 1540, | |
| "valid_targets_mean": 502.7, | |
| "valid_targets_min": 337 | |
| }, | |
| { | |
| "epoch": 3.796068796068796, | |
| "grad_norm": 1.1499950845573694, | |
| "learning_rate": 1.4344148304559926e-05, | |
| "loss": 0.1333, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1443779170513153, | |
| "step": 1545, | |
| "valid_targets_mean": 646.7, | |
| "valid_targets_min": 396 | |
| }, | |
| { | |
| "epoch": 3.808353808353808, | |
| "grad_norm": 1.1616430554436303, | |
| "learning_rate": 1.4207136260355426e-05, | |
| "loss": 0.1265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1254158318042755, | |
| "step": 1550, | |
| "valid_targets_mean": 638.1, | |
| "valid_targets_min": 338 | |
| }, | |
| { | |
| "epoch": 3.8206388206388207, | |
| "grad_norm": 1.132424544684703, | |
| "learning_rate": 1.4070420338443667e-05, | |
| "loss": 0.1298, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13349393010139465, | |
| "step": 1555, | |
| "valid_targets_mean": 654.6, | |
| "valid_targets_min": 327 | |
| }, | |
| { | |
| "epoch": 3.832923832923833, | |
| "grad_norm": 1.3001413881701709, | |
| "learning_rate": 1.3934007527532494e-05, | |
| "loss": 0.1296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13770049810409546, | |
| "step": 1560, | |
| "valid_targets_mean": 481.9, | |
| "valid_targets_min": 336 | |
| }, | |
| { | |
| "epoch": 3.845208845208845, | |
| "grad_norm": 1.1715428722278898, | |
| "learning_rate": 1.3797904800835174e-05, | |
| "loss": 0.1239, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12449394166469574, | |
| "step": 1565, | |
| "valid_targets_mean": 480.0, | |
| "valid_targets_min": 356 | |
| }, | |
| { | |
| "epoch": 3.8574938574938575, | |
| "grad_norm": 1.0380113852952901, | |
| "learning_rate": 1.3662119115713968e-05, | |
| "loss": 0.1237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11352840811014175, | |
| "step": 1570, | |
| "valid_targets_mean": 607.8, | |
| "valid_targets_min": 358 | |
| }, | |
| { | |
| "epoch": 3.8697788697788695, | |
| "grad_norm": 1.0491947528953767, | |
| "learning_rate": 1.3526657413324427e-05, | |
| "loss": 0.1263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10872411727905273, | |
| "step": 1575, | |
| "valid_targets_mean": 613.9, | |
| "valid_targets_min": 346 | |
| }, | |
| { | |
| "epoch": 3.882063882063882, | |
| "grad_norm": 1.1342755535198472, | |
| "learning_rate": 1.3391526618260636e-05, | |
| "loss": 0.1319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13144788146018982, | |
| "step": 1580, | |
| "valid_targets_mean": 571.2, | |
| "valid_targets_min": 396 | |
| }, | |
| { | |
| "epoch": 3.8943488943488944, | |
| "grad_norm": 1.2509197238722758, | |
| "learning_rate": 1.3256733638201172e-05, | |
| "loss": 0.1329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14529390633106232, | |
| "step": 1585, | |
| "valid_targets_mean": 546.2, | |
| "valid_targets_min": 328 | |
| }, | |
| { | |
| "epoch": 3.906633906633907, | |
| "grad_norm": 0.9973094279197666, | |
| "learning_rate": 1.3122285363556053e-05, | |
| "loss": 0.1251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11257536709308624, | |
| "step": 1590, | |
| "valid_targets_mean": 564.9, | |
| "valid_targets_min": 374 | |
| }, | |
| { | |
| "epoch": 3.918918918918919, | |
| "grad_norm": 1.2025598432808287, | |
| "learning_rate": 1.2988188667114487e-05, | |
| "loss": 0.1255, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13427409529685974, | |
| "step": 1595, | |
| "valid_targets_mean": 562.8, | |
| "valid_targets_min": 315 | |
| }, | |
| { | |
| "epoch": 3.9312039312039313, | |
| "grad_norm": 1.1529440857186466, | |
| "learning_rate": 1.2854450403693526e-05, | |
| "loss": 0.1296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1339278221130371, | |
| "step": 1600, | |
| "valid_targets_mean": 503.7, | |
| "valid_targets_min": 339 | |
| }, | |
| { | |
| "epoch": 3.9434889434889433, | |
| "grad_norm": 1.0860172451290426, | |
| "learning_rate": 1.272107740978769e-05, | |
| "loss": 0.1278, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1276274025440216, | |
| "step": 1605, | |
| "valid_targets_mean": 653.6, | |
| "valid_targets_min": 320 | |
| }, | |
| { | |
| "epoch": 3.9557739557739557, | |
| "grad_norm": 1.1427290885983061, | |
| "learning_rate": 1.2588076503219475e-05, | |
| "loss": 0.1282, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12042656540870667, | |
| "step": 1610, | |
| "valid_targets_mean": 583.0, | |
| "valid_targets_min": 344 | |
| }, | |
| { | |
| "epoch": 3.968058968058968, | |
| "grad_norm": 1.1290073089466048, | |
| "learning_rate": 1.2455454482790859e-05, | |
| "loss": 0.1253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13513417541980743, | |
| "step": 1615, | |
| "valid_targets_mean": 533.4, | |
| "valid_targets_min": 318 | |
| }, | |
| { | |
| "epoch": 3.98034398034398, | |
| "grad_norm": 1.1928783790551887, | |
| "learning_rate": 1.2323218127935714e-05, | |
| "loss": 0.1251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1335742324590683, | |
| "step": 1620, | |
| "valid_targets_mean": 557.1, | |
| "valid_targets_min": 368 | |
| }, | |
| { | |
| "epoch": 3.9926289926289926, | |
| "grad_norm": 1.150530284349401, | |
| "learning_rate": 1.2191374198373309e-05, | |
| "loss": 0.1271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12559539079666138, | |
| "step": 1625, | |
| "valid_targets_mean": 588.1, | |
| "valid_targets_min": 317 | |
| }, | |
| { | |
| "epoch": 4.004914004914005, | |
| "grad_norm": 1.0048824233082845, | |
| "learning_rate": 1.2059929433762734e-05, | |
| "loss": 0.12, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10455832630395889, | |
| "step": 1630, | |
| "valid_targets_mean": 675.5, | |
| "valid_targets_min": 341 | |
| }, | |
| { | |
| "epoch": 4.017199017199017, | |
| "grad_norm": 1.163545632299262, | |
| "learning_rate": 1.1928890553358352e-05, | |
| "loss": 0.1024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10858234018087387, | |
| "step": 1635, | |
| "valid_targets_mean": 502.5, | |
| "valid_targets_min": 365 | |
| }, | |
| { | |
| "epoch": 4.0294840294840295, | |
| "grad_norm": 1.2412134097978402, | |
| "learning_rate": 1.1798264255666387e-05, | |
| "loss": 0.1027, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12209483981132507, | |
| "step": 1640, | |
| "valid_targets_mean": 621.5, | |
| "valid_targets_min": 387 | |
| }, | |
| { | |
| "epoch": 4.041769041769042, | |
| "grad_norm": 1.3264719047529132, | |
| "learning_rate": 1.1668057218102436e-05, | |
| "loss": 0.1032, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10184046626091003, | |
| "step": 1645, | |
| "valid_targets_mean": 567.1, | |
| "valid_targets_min": 327 | |
| }, | |
| { | |
| "epoch": 4.054054054054054, | |
| "grad_norm": 1.2340618039253701, | |
| "learning_rate": 1.1538276096650175e-05, | |
| "loss": 0.1013, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09597574919462204, | |
| "step": 1650, | |
| "valid_targets_mean": 625.8, | |
| "valid_targets_min": 406 | |
| }, | |
| { | |
| "epoch": 4.066339066339066, | |
| "grad_norm": 1.1327237171212756, | |
| "learning_rate": 1.1408927525521118e-05, | |
| "loss": 0.0984, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09209711849689484, | |
| "step": 1655, | |
| "valid_targets_mean": 571.6, | |
| "valid_targets_min": 347 | |
| }, | |
| { | |
| "epoch": 4.078624078624078, | |
| "grad_norm": 1.1342526879934762, | |
| "learning_rate": 1.1280018116815438e-05, | |
| "loss": 0.0993, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09200088679790497, | |
| "step": 1660, | |
| "valid_targets_mean": 566.7, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 4.090909090909091, | |
| "grad_norm": 1.2009691446799167, | |
| "learning_rate": 1.115155446018404e-05, | |
| "loss": 0.0998, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08615422993898392, | |
| "step": 1665, | |
| "valid_targets_mean": 539.9, | |
| "valid_targets_min": 347 | |
| }, | |
| { | |
| "epoch": 4.103194103194103, | |
| "grad_norm": 1.2104780895909246, | |
| "learning_rate": 1.1023543122491626e-05, | |
| "loss": 0.0972, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10453509539365768, | |
| "step": 1670, | |
| "valid_targets_mean": 530.2, | |
| "valid_targets_min": 357 | |
| }, | |
| { | |
| "epoch": 4.115479115479116, | |
| "grad_norm": 1.2910856332936864, | |
| "learning_rate": 1.089599064748108e-05, | |
| "loss": 0.1014, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09106092900037766, | |
| "step": 1675, | |
| "valid_targets_mean": 607.2, | |
| "valid_targets_min": 347 | |
| }, | |
| { | |
| "epoch": 4.127764127764128, | |
| "grad_norm": 1.230623396109194, | |
| "learning_rate": 1.0768903555438927e-05, | |
| "loss": 0.1002, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11302720755338669, | |
| "step": 1680, | |
| "valid_targets_mean": 638.8, | |
| "valid_targets_min": 314 | |
| }, | |
| { | |
| "epoch": 4.14004914004914, | |
| "grad_norm": 1.071186203200074, | |
| "learning_rate": 1.0642288342862007e-05, | |
| "loss": 0.097, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09703318774700165, | |
| "step": 1685, | |
| "valid_targets_mean": 661.2, | |
| "valid_targets_min": 434 | |
| }, | |
| { | |
| "epoch": 4.152334152334152, | |
| "grad_norm": 1.0873993018103425, | |
| "learning_rate": 1.051615148212544e-05, | |
| "loss": 0.0981, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09849526733160019, | |
| "step": 1690, | |
| "valid_targets_mean": 622.8, | |
| "valid_targets_min": 390 | |
| }, | |
| { | |
| "epoch": 4.164619164619165, | |
| "grad_norm": 1.1263995338301178, | |
| "learning_rate": 1.0390499421151706e-05, | |
| "loss": 0.0987, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09109313786029816, | |
| "step": 1695, | |
| "valid_targets_mean": 578.1, | |
| "valid_targets_min": 355 | |
| }, | |
| { | |
| "epoch": 4.176904176904177, | |
| "grad_norm": 1.0985910939618122, | |
| "learning_rate": 1.0265338583081088e-05, | |
| "loss": 0.0984, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08648423850536346, | |
| "step": 1700, | |
| "valid_targets_mean": 595.2, | |
| "valid_targets_min": 364 | |
| }, | |
| { | |
| "epoch": 4.1891891891891895, | |
| "grad_norm": 1.1590572484293804, | |
| "learning_rate": 1.0140675365943284e-05, | |
| "loss": 0.1003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09820704907178879, | |
| "step": 1705, | |
| "valid_targets_mean": 609.7, | |
| "valid_targets_min": 338 | |
| }, | |
| { | |
| "epoch": 4.201474201474202, | |
| "grad_norm": 1.1427962066433188, | |
| "learning_rate": 1.0016516142330404e-05, | |
| "loss": 0.1038, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09883490204811096, | |
| "step": 1710, | |
| "valid_targets_mean": 652.7, | |
| "valid_targets_min": 360 | |
| }, | |
| { | |
| "epoch": 4.2137592137592135, | |
| "grad_norm": 1.2229775700196275, | |
| "learning_rate": 9.89286725907117e-06, | |
| "loss": 0.1029, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09851114451885223, | |
| "step": 1715, | |
| "valid_targets_mean": 616.8, | |
| "valid_targets_min": 335 | |
| }, | |
| { | |
| "epoch": 4.226044226044226, | |
| "grad_norm": 1.2892267743946126, | |
| "learning_rate": 9.769735036906475e-06, | |
| "loss": 0.1015, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.094369076192379, | |
| "step": 1720, | |
| "valid_targets_mean": 535.9, | |
| "valid_targets_min": 371 | |
| }, | |
| { | |
| "epoch": 4.238329238329238, | |
| "grad_norm": 1.1935112749649825, | |
| "learning_rate": 9.647125770166321e-06, | |
| "loss": 0.1034, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10198774188756943, | |
| "step": 1725, | |
| "valid_targets_mean": 545.2, | |
| "valid_targets_min": 350 | |
| }, | |
| { | |
| "epoch": 4.250614250614251, | |
| "grad_norm": 1.2238824327091253, | |
| "learning_rate": 9.525045726448001e-06, | |
| "loss": 0.1022, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10707564651966095, | |
| "step": 1730, | |
| "valid_targets_mean": 562.8, | |
| "valid_targets_min": 352 | |
| }, | |
| { | |
| "epoch": 4.262899262899263, | |
| "grad_norm": 1.2437855878561452, | |
| "learning_rate": 9.40350114629577e-06, | |
| "loss": 0.0999, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10580414533615112, | |
| "step": 1735, | |
| "valid_targets_mean": 605.5, | |
| "valid_targets_min": 366 | |
| }, | |
| { | |
| "epoch": 4.275184275184275, | |
| "grad_norm": 1.2388603553940354, | |
| "learning_rate": 9.282498242881784e-06, | |
| "loss": 0.1007, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10578162968158722, | |
| "step": 1740, | |
| "valid_targets_mean": 588.9, | |
| "valid_targets_min": 364 | |
| }, | |
| { | |
| "epoch": 4.287469287469287, | |
| "grad_norm": 1.1790865419807355, | |
| "learning_rate": 9.162043201688517e-06, | |
| "loss": 0.0992, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.105898916721344, | |
| "step": 1745, | |
| "valid_targets_mean": 573.7, | |
| "valid_targets_min": 304 | |
| }, | |
| { | |
| "epoch": 4.2997542997543, | |
| "grad_norm": 1.2072953523804615, | |
| "learning_rate": 9.042142180192596e-06, | |
| "loss": 0.0969, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10502354800701141, | |
| "step": 1750, | |
| "valid_targets_mean": 618.2, | |
| "valid_targets_min": 311 | |
| }, | |
| { | |
| "epoch": 4.312039312039312, | |
| "grad_norm": 1.2456975643832313, | |
| "learning_rate": 8.92280130754998e-06, | |
| "loss": 0.0978, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10343105345964432, | |
| "step": 1755, | |
| "valid_targets_mean": 583.5, | |
| "valid_targets_min": 367 | |
| }, | |
| { | |
| "epoch": 4.324324324324325, | |
| "grad_norm": 1.1047621262208223, | |
| "learning_rate": 8.804026684282694e-06, | |
| "loss": 0.1017, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09652222692966461, | |
| "step": 1760, | |
| "valid_targets_mean": 556.1, | |
| "valid_targets_min": 353 | |
| }, | |
| { | |
| "epoch": 4.336609336609337, | |
| "grad_norm": 1.1690242316360768, | |
| "learning_rate": 8.685824381966975e-06, | |
| "loss": 0.0998, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1021624505519867, | |
| "step": 1765, | |
| "valid_targets_mean": 600.4, | |
| "valid_targets_min": 376 | |
| }, | |
| { | |
| "epoch": 4.348894348894349, | |
| "grad_norm": 1.168482386851969, | |
| "learning_rate": 8.568200442922865e-06, | |
| "loss": 0.098, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09414025396108627, | |
| "step": 1770, | |
| "valid_targets_mean": 533.4, | |
| "valid_targets_min": 353 | |
| }, | |
| { | |
| "epoch": 4.361179361179361, | |
| "grad_norm": 1.2014191056193684, | |
| "learning_rate": 8.451160879905398e-06, | |
| "loss": 0.1044, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11013706028461456, | |
| "step": 1775, | |
| "valid_targets_mean": 574.8, | |
| "valid_targets_min": 358 | |
| }, | |
| { | |
| "epoch": 4.3734643734643734, | |
| "grad_norm": 1.3497231211942646, | |
| "learning_rate": 8.33471167579717e-06, | |
| "loss": 0.0998, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11065986752510071, | |
| "step": 1780, | |
| "valid_targets_mean": 560.4, | |
| "valid_targets_min": 338 | |
| }, | |
| { | |
| "epoch": 4.385749385749386, | |
| "grad_norm": 1.1746303621149712, | |
| "learning_rate": 8.218858783302566e-06, | |
| "loss": 0.1058, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10344547033309937, | |
| "step": 1785, | |
| "valid_targets_mean": 543.9, | |
| "valid_targets_min": 365 | |
| }, | |
| { | |
| "epoch": 4.398034398034398, | |
| "grad_norm": 1.2044261375281227, | |
| "learning_rate": 8.103608124643412e-06, | |
| "loss": 0.1008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09899267554283142, | |
| "step": 1790, | |
| "valid_targets_mean": 519.8, | |
| "valid_targets_min": 343 | |
| }, | |
| { | |
| "epoch": 4.41031941031941, | |
| "grad_norm": 1.2175130594542445, | |
| "learning_rate": 7.988965591256284e-06, | |
| "loss": 0.1021, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10632935166358948, | |
| "step": 1795, | |
| "valid_targets_mean": 628.6, | |
| "valid_targets_min": 325 | |
| }, | |
| { | |
| "epoch": 4.422604422604422, | |
| "grad_norm": 1.2792552447352683, | |
| "learning_rate": 7.874937043491331e-06, | |
| "loss": 0.1047, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10912451148033142, | |
| "step": 1800, | |
| "valid_targets_mean": 513.6, | |
| "valid_targets_min": 377 | |
| }, | |
| { | |
| "epoch": 4.434889434889435, | |
| "grad_norm": 1.182229707392635, | |
| "learning_rate": 7.761528310312679e-06, | |
| "loss": 0.1008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09684855490922928, | |
| "step": 1805, | |
| "valid_targets_mean": 550.6, | |
| "valid_targets_min": 338 | |
| }, | |
| { | |
| "epoch": 4.447174447174447, | |
| "grad_norm": 1.2263550156104428, | |
| "learning_rate": 7.648745189000511e-06, | |
| "loss": 0.1001, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10374526679515839, | |
| "step": 1810, | |
| "valid_targets_mean": 553.9, | |
| "valid_targets_min": 310 | |
| }, | |
| { | |
| "epoch": 4.45945945945946, | |
| "grad_norm": 1.1731823115544264, | |
| "learning_rate": 7.536593444854663e-06, | |
| "loss": 0.1034, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10558365285396576, | |
| "step": 1815, | |
| "valid_targets_mean": 633.2, | |
| "valid_targets_min": 338 | |
| }, | |
| { | |
| "epoch": 4.471744471744472, | |
| "grad_norm": 1.1474916322105462, | |
| "learning_rate": 7.4250788108999686e-06, | |
| "loss": 0.1001, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09885166585445404, | |
| "step": 1820, | |
| "valid_targets_mean": 717.4, | |
| "valid_targets_min": 377 | |
| }, | |
| { | |
| "epoch": 4.484029484029484, | |
| "grad_norm": 1.094647206055445, | |
| "learning_rate": 7.314206987593162e-06, | |
| "loss": 0.1036, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10133668780326843, | |
| "step": 1825, | |
| "valid_targets_mean": 636.3, | |
| "valid_targets_min": 329 | |
| }, | |
| { | |
| "epoch": 4.496314496314496, | |
| "grad_norm": 1.1905531326550673, | |
| "learning_rate": 7.203983642531462e-06, | |
| "loss": 0.1006, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09761211276054382, | |
| "step": 1830, | |
| "valid_targets_mean": 589.3, | |
| "valid_targets_min": 378 | |
| }, | |
| { | |
| "epoch": 4.5085995085995085, | |
| "grad_norm": 1.231956473792296, | |
| "learning_rate": 7.094414410162913e-06, | |
| "loss": 0.1075, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12320096790790558, | |
| "step": 1835, | |
| "valid_targets_mean": 671.7, | |
| "valid_targets_min": 318 | |
| }, | |
| { | |
| "epoch": 4.520884520884521, | |
| "grad_norm": 1.1916369852456652, | |
| "learning_rate": 6.985504891498291e-06, | |
| "loss": 0.103, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09794484078884125, | |
| "step": 1840, | |
| "valid_targets_mean": 506.5, | |
| "valid_targets_min": 348 | |
| }, | |
| { | |
| "epoch": 4.533169533169533, | |
| "grad_norm": 1.1392602085497459, | |
| "learning_rate": 6.8772606538248285e-06, | |
| "loss": 0.1007, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0966009795665741, | |
| "step": 1845, | |
| "valid_targets_mean": 601.4, | |
| "valid_targets_min": 377 | |
| }, | |
| { | |
| "epoch": 4.545454545454545, | |
| "grad_norm": 1.3903615271877925, | |
| "learning_rate": 6.769687230421638e-06, | |
| "loss": 0.1052, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11042024195194244, | |
| "step": 1850, | |
| "valid_targets_mean": 554.5, | |
| "valid_targets_min": 278 | |
| }, | |
| { | |
| "epoch": 4.557739557739557, | |
| "grad_norm": 1.2300336106127345, | |
| "learning_rate": 6.662790120276803e-06, | |
| "loss": 0.1027, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09811799973249435, | |
| "step": 1855, | |
| "valid_targets_mean": 534.4, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 4.57002457002457, | |
| "grad_norm": 1.2200961784026068, | |
| "learning_rate": 6.556574787806344e-06, | |
| "loss": 0.1013, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10353550314903259, | |
| "step": 1860, | |
| "valid_targets_mean": 529.3, | |
| "valid_targets_min": 340 | |
| }, | |
| { | |
| "epoch": 4.582309582309582, | |
| "grad_norm": 1.1625800754502562, | |
| "learning_rate": 6.451046662574831e-06, | |
| "loss": 0.1009, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10336748510599136, | |
| "step": 1865, | |
| "valid_targets_mean": 633.1, | |
| "valid_targets_min": 419 | |
| }, | |
| { | |
| "epoch": 4.594594594594595, | |
| "grad_norm": 1.2713762384531742, | |
| "learning_rate": 6.346211139017877e-06, | |
| "loss": 0.1012, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10492067039012909, | |
| "step": 1870, | |
| "valid_targets_mean": 561.4, | |
| "valid_targets_min": 327 | |
| }, | |
| { | |
| "epoch": 4.606879606879607, | |
| "grad_norm": 1.3231558835585264, | |
| "learning_rate": 6.242073576166337e-06, | |
| "loss": 0.1015, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10041987895965576, | |
| "step": 1875, | |
| "valid_targets_mean": 548.6, | |
| "valid_targets_min": 376 | |
| }, | |
| { | |
| "epoch": 4.61916461916462, | |
| "grad_norm": 1.2601382420703833, | |
| "learning_rate": 6.138639297372404e-06, | |
| "loss": 0.0959, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09813426434993744, | |
| "step": 1880, | |
| "valid_targets_mean": 577.2, | |
| "valid_targets_min": 376 | |
| }, | |
| { | |
| "epoch": 4.631449631449631, | |
| "grad_norm": 1.2257187798744478, | |
| "learning_rate": 6.035913590037479e-06, | |
| "loss": 0.0998, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11072864383459091, | |
| "step": 1885, | |
| "valid_targets_mean": 576.0, | |
| "valid_targets_min": 315 | |
| }, | |
| { | |
| "epoch": 4.643734643734644, | |
| "grad_norm": 1.2004615205444573, | |
| "learning_rate": 5.933901705341851e-06, | |
| "loss": 0.1016, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09538184106349945, | |
| "step": 1890, | |
| "valid_targets_mean": 589.8, | |
| "valid_targets_min": 380 | |
| }, | |
| { | |
| "epoch": 4.656019656019656, | |
| "grad_norm": 1.1868393241698096, | |
| "learning_rate": 5.832608857976321e-06, | |
| "loss": 0.095, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10105034708976746, | |
| "step": 1895, | |
| "valid_targets_mean": 611.6, | |
| "valid_targets_min": 385 | |
| }, | |
| { | |
| "epoch": 4.6683046683046685, | |
| "grad_norm": 1.3081946253917116, | |
| "learning_rate": 5.732040225875584e-06, | |
| "loss": 0.1022, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12059606611728668, | |
| "step": 1900, | |
| "valid_targets_mean": 560.9, | |
| "valid_targets_min": 354 | |
| }, | |
| { | |
| "epoch": 4.680589680589681, | |
| "grad_norm": 1.2987377377767717, | |
| "learning_rate": 5.632200949953579e-06, | |
| "loss": 0.1008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10647489130496979, | |
| "step": 1905, | |
| "valid_targets_mean": 531.2, | |
| "valid_targets_min": 355 | |
| }, | |
| { | |
| "epoch": 4.6928746928746925, | |
| "grad_norm": 1.3291852046527648, | |
| "learning_rate": 5.533096133840677e-06, | |
| "loss": 0.0966, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09767606109380722, | |
| "step": 1910, | |
| "valid_targets_mean": 544.9, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 4.705159705159705, | |
| "grad_norm": 1.0561152027218779, | |
| "learning_rate": 5.434730843622778e-06, | |
| "loss": 0.0929, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0867503434419632, | |
| "step": 1915, | |
| "valid_targets_mean": 566.1, | |
| "valid_targets_min": 385 | |
| }, | |
| { | |
| "epoch": 4.717444717444717, | |
| "grad_norm": 1.1538215958161149, | |
| "learning_rate": 5.337110107582377e-06, | |
| "loss": 0.0953, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09404034167528152, | |
| "step": 1920, | |
| "valid_targets_mean": 560.6, | |
| "valid_targets_min": 393 | |
| }, | |
| { | |
| "epoch": 4.72972972972973, | |
| "grad_norm": 1.1289210852598999, | |
| "learning_rate": 5.2402389159414755e-06, | |
| "loss": 0.1005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10088243335485458, | |
| "step": 1925, | |
| "valid_targets_mean": 601.7, | |
| "valid_targets_min": 391 | |
| }, | |
| { | |
| "epoch": 4.742014742014742, | |
| "grad_norm": 1.1358514090172158, | |
| "learning_rate": 5.144122220606542e-06, | |
| "loss": 0.0945, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0944918692111969, | |
| "step": 1930, | |
| "valid_targets_mean": 662.3, | |
| "valid_targets_min": 363 | |
| }, | |
| { | |
| "epoch": 4.754299754299755, | |
| "grad_norm": 1.1514337440143445, | |
| "learning_rate": 5.048764934915349e-06, | |
| "loss": 0.0964, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09842099249362946, | |
| "step": 1935, | |
| "valid_targets_mean": 574.4, | |
| "valid_targets_min": 354 | |
| }, | |
| { | |
| "epoch": 4.766584766584766, | |
| "grad_norm": 1.1230515925428892, | |
| "learning_rate": 4.954171933385805e-06, | |
| "loss": 0.0987, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09137429296970367, | |
| "step": 1940, | |
| "valid_targets_mean": 570.8, | |
| "valid_targets_min": 337 | |
| }, | |
| { | |
| "epoch": 4.778869778869779, | |
| "grad_norm": 1.2121531782344195, | |
| "learning_rate": 4.8603480514667836e-06, | |
| "loss": 0.1008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09575438499450684, | |
| "step": 1945, | |
| "valid_targets_mean": 558.6, | |
| "valid_targets_min": 364 | |
| }, | |
| { | |
| "epoch": 4.791154791154791, | |
| "grad_norm": 1.1949570463904897, | |
| "learning_rate": 4.767298085290963e-06, | |
| "loss": 0.1025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10193642973899841, | |
| "step": 1950, | |
| "valid_targets_mean": 609.2, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 4.803439803439804, | |
| "grad_norm": 1.2296273425861448, | |
| "learning_rate": 4.675026791429624e-06, | |
| "loss": 0.1015, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10295425355434418, | |
| "step": 1955, | |
| "valid_targets_mean": 519.8, | |
| "valid_targets_min": 316 | |
| }, | |
| { | |
| "epoch": 4.815724815724816, | |
| "grad_norm": 1.1149494013206527, | |
| "learning_rate": 4.583538886649525e-06, | |
| "loss": 0.0955, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0870693176984787, | |
| "step": 1960, | |
| "valid_targets_mean": 525.1, | |
| "valid_targets_min": 360 | |
| }, | |
| { | |
| "epoch": 4.828009828009828, | |
| "grad_norm": 1.1527137920360166, | |
| "learning_rate": 4.492839047671764e-06, | |
| "loss": 0.1003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10126141458749771, | |
| "step": 1965, | |
| "valid_targets_mean": 597.5, | |
| "valid_targets_min": 345 | |
| }, | |
| { | |
| "epoch": 4.84029484029484, | |
| "grad_norm": 1.285693094621376, | |
| "learning_rate": 4.4029319109327465e-06, | |
| "loss": 0.098, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10001290589570999, | |
| "step": 1970, | |
| "valid_targets_mean": 564.9, | |
| "valid_targets_min": 410 | |
| }, | |
| { | |
| "epoch": 4.8525798525798525, | |
| "grad_norm": 1.1948838524886878, | |
| "learning_rate": 4.313822072347136e-06, | |
| "loss": 0.0973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10110676288604736, | |
| "step": 1975, | |
| "valid_targets_mean": 609.7, | |
| "valid_targets_min": 361 | |
| }, | |
| { | |
| "epoch": 4.864864864864865, | |
| "grad_norm": 1.0867537916477328, | |
| "learning_rate": 4.22551408707296e-06, | |
| "loss": 0.0965, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09373465180397034, | |
| "step": 1980, | |
| "valid_targets_mean": 669.0, | |
| "valid_targets_min": 400 | |
| }, | |
| { | |
| "epoch": 4.877149877149877, | |
| "grad_norm": 1.1751891930352016, | |
| "learning_rate": 4.138012469278714e-06, | |
| "loss": 0.1015, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09852367639541626, | |
| "step": 1985, | |
| "valid_targets_mean": 546.5, | |
| "valid_targets_min": 350 | |
| }, | |
| { | |
| "epoch": 4.88943488943489, | |
| "grad_norm": 1.256508408435214, | |
| "learning_rate": 4.051321691912649e-06, | |
| "loss": 0.1031, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10389189422130585, | |
| "step": 1990, | |
| "valid_targets_mean": 519.4, | |
| "valid_targets_min": 321 | |
| }, | |
| { | |
| "epoch": 4.901719901719901, | |
| "grad_norm": 1.154895355871411, | |
| "learning_rate": 3.9654461864740935e-06, | |
| "loss": 0.0991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09166670590639114, | |
| "step": 1995, | |
| "valid_targets_mean": 577.4, | |
| "valid_targets_min": 324 | |
| }, | |
| { | |
| "epoch": 4.914004914004914, | |
| "grad_norm": 1.4017783580922674, | |
| "learning_rate": 3.880390342786915e-06, | |
| "loss": 0.0968, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09913163632154465, | |
| "step": 2000, | |
| "valid_targets_mean": 477.3, | |
| "valid_targets_min": 350 | |
| }, | |
| { | |
| "epoch": 4.926289926289926, | |
| "grad_norm": 1.1230883383998704, | |
| "learning_rate": 3.7961585087751516e-06, | |
| "loss": 0.0932, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0872161015868187, | |
| "step": 2005, | |
| "valid_targets_mean": 639.8, | |
| "valid_targets_min": 389 | |
| }, | |
| { | |
| "epoch": 4.938574938574939, | |
| "grad_norm": 1.2215546893236813, | |
| "learning_rate": 3.71275499024071e-06, | |
| "loss": 0.0997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10271018743515015, | |
| "step": 2010, | |
| "valid_targets_mean": 554.2, | |
| "valid_targets_min": 349 | |
| }, | |
| { | |
| "epoch": 4.950859950859951, | |
| "grad_norm": 1.1371869103474366, | |
| "learning_rate": 3.6301840506433083e-06, | |
| "loss": 0.0942, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09029437601566315, | |
| "step": 2015, | |
| "valid_targets_mean": 575.8, | |
| "valid_targets_min": 378 | |
| }, | |
| { | |
| "epoch": 4.963144963144963, | |
| "grad_norm": 1.5723266744911633, | |
| "learning_rate": 3.5484499108824853e-06, | |
| "loss": 0.0978, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09650382399559021, | |
| "step": 2020, | |
| "valid_targets_mean": 605.9, | |
| "valid_targets_min": 300 | |
| }, | |
| { | |
| "epoch": 4.975429975429975, | |
| "grad_norm": 1.201414516571051, | |
| "learning_rate": 3.4675567490818727e-06, | |
| "loss": 0.1029, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10197515785694122, | |
| "step": 2025, | |
| "valid_targets_mean": 624.2, | |
| "valid_targets_min": 368 | |
| }, | |
| { | |
| "epoch": 4.987714987714988, | |
| "grad_norm": 1.1607939133459078, | |
| "learning_rate": 3.3875087003756036e-06, | |
| "loss": 0.0975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09291709959506989, | |
| "step": 2030, | |
| "valid_targets_mean": 587.6, | |
| "valid_targets_min": 346 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 1.1476704642538418, | |
| "learning_rate": 3.30830985669691e-06, | |
| "loss": 0.0966, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08938758075237274, | |
| "step": 2035, | |
| "valid_targets_mean": 497.5, | |
| "valid_targets_min": 374 | |
| }, | |
| { | |
| "epoch": 5.012285012285012, | |
| "grad_norm": 1.0154622918334222, | |
| "learning_rate": 3.22996426656899e-06, | |
| "loss": 0.0854, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08019639551639557, | |
| "step": 2040, | |
| "valid_targets_mean": 645.4, | |
| "valid_targets_min": 318 | |
| }, | |
| { | |
| "epoch": 5.024570024570025, | |
| "grad_norm": 1.0873366416439045, | |
| "learning_rate": 3.1524759348980096e-06, | |
| "loss": 0.0836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08791200816631317, | |
| "step": 2045, | |
| "valid_targets_mean": 640.9, | |
| "valid_targets_min": 353 | |
| }, | |
| { | |
| "epoch": 5.036855036855036, | |
| "grad_norm": 1.2422621105770808, | |
| "learning_rate": 3.0758488227684212e-06, | |
| "loss": 0.0828, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08541159331798553, | |
| "step": 2050, | |
| "valid_targets_mean": 473.1, | |
| "valid_targets_min": 315 | |
| }, | |
| { | |
| "epoch": 5.049140049140049, | |
| "grad_norm": 1.1245818830882286, | |
| "learning_rate": 3.0000868472404423e-06, | |
| "loss": 0.0889, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0811869204044342, | |
| "step": 2055, | |
| "valid_targets_mean": 620.8, | |
| "valid_targets_min": 350 | |
| }, | |
| { | |
| "epoch": 5.061425061425061, | |
| "grad_norm": 1.1981733121122524, | |
| "learning_rate": 2.9251938811498436e-06, | |
| "loss": 0.0868, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08584136515855789, | |
| "step": 2060, | |
| "valid_targets_mean": 559.6, | |
| "valid_targets_min": 366 | |
| }, | |
| { | |
| "epoch": 5.073710073710074, | |
| "grad_norm": 1.143387432773036, | |
| "learning_rate": 2.8511737529099704e-06, | |
| "loss": 0.0859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08999934047460556, | |
| "step": 2065, | |
| "valid_targets_mean": 638.4, | |
| "valid_targets_min": 375 | |
| }, | |
| { | |
| "epoch": 5.085995085995086, | |
| "grad_norm": 1.143479733060532, | |
| "learning_rate": 2.7780302463160235e-06, | |
| "loss": 0.0875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0899144858121872, | |
| "step": 2070, | |
| "valid_targets_mean": 600.1, | |
| "valid_targets_min": 305 | |
| }, | |
| { | |
| "epoch": 5.098280098280099, | |
| "grad_norm": 1.2493107418250313, | |
| "learning_rate": 2.705767100351673e-06, | |
| "loss": 0.0875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08807513862848282, | |
| "step": 2075, | |
| "valid_targets_mean": 477.2, | |
| "valid_targets_min": 328 | |
| }, | |
| { | |
| "epoch": 5.11056511056511, | |
| "grad_norm": 1.277539917750501, | |
| "learning_rate": 2.634388008997899e-06, | |
| "loss": 0.0872, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09081301093101501, | |
| "step": 2080, | |
| "valid_targets_mean": 548.4, | |
| "valid_targets_min": 326 | |
| }, | |
| { | |
| "epoch": 5.122850122850123, | |
| "grad_norm": 1.041138079187501, | |
| "learning_rate": 2.5638966210441597e-06, | |
| "loss": 0.0817, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07195958495140076, | |
| "step": 2085, | |
| "valid_targets_mean": 630.1, | |
| "valid_targets_min": 398 | |
| }, | |
| { | |
| "epoch": 5.135135135135135, | |
| "grad_norm": 1.2357154232642422, | |
| "learning_rate": 2.4942965399018926e-06, | |
| "loss": 0.0865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09714603424072266, | |
| "step": 2090, | |
| "valid_targets_mean": 696.2, | |
| "valid_targets_min": 365 | |
| }, | |
| { | |
| "epoch": 5.1474201474201475, | |
| "grad_norm": 1.2604606849246982, | |
| "learning_rate": 2.425591323420289e-06, | |
| "loss": 0.0867, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08725640177726746, | |
| "step": 2095, | |
| "valid_targets_mean": 555.8, | |
| "valid_targets_min": 287 | |
| }, | |
| { | |
| "epoch": 5.15970515970516, | |
| "grad_norm": 1.2025961132788572, | |
| "learning_rate": 2.357784483704444e-06, | |
| "loss": 0.0851, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0895378589630127, | |
| "step": 2100, | |
| "valid_targets_mean": 580.9, | |
| "valid_targets_min": 343 | |
| }, | |
| { | |
| "epoch": 5.171990171990172, | |
| "grad_norm": 1.253165445892313, | |
| "learning_rate": 2.2908794869358044e-06, | |
| "loss": 0.0835, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08695147931575775, | |
| "step": 2105, | |
| "valid_targets_mean": 505.1, | |
| "valid_targets_min": 352 | |
| }, | |
| { | |
| "epoch": 5.184275184275184, | |
| "grad_norm": 1.2289552360721567, | |
| "learning_rate": 2.2248797531949952e-06, | |
| "loss": 0.0891, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0879005640745163, | |
| "step": 2110, | |
| "valid_targets_mean": 531.9, | |
| "valid_targets_min": 321 | |
| }, | |
| { | |
| "epoch": 5.196560196560196, | |
| "grad_norm": 1.1908990615471768, | |
| "learning_rate": 2.1597886562869917e-06, | |
| "loss": 0.0859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0877969041466713, | |
| "step": 2115, | |
| "valid_targets_mean": 577.0, | |
| "valid_targets_min": 337 | |
| }, | |
| { | |
| "epoch": 5.208845208845209, | |
| "grad_norm": 1.192626976342137, | |
| "learning_rate": 2.095609523568638e-06, | |
| "loss": 0.0842, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07734735310077667, | |
| "step": 2120, | |
| "valid_targets_mean": 562.0, | |
| "valid_targets_min": 329 | |
| }, | |
| { | |
| "epoch": 5.221130221130221, | |
| "grad_norm": 1.2082555924579998, | |
| "learning_rate": 2.0323456357785855e-06, | |
| "loss": 0.087, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0842600166797638, | |
| "step": 2125, | |
| "valid_targets_mean": 585.1, | |
| "valid_targets_min": 352 | |
| }, | |
| { | |
| "epoch": 5.233415233415234, | |
| "grad_norm": 1.2386435260363529, | |
| "learning_rate": 1.970000226869553e-06, | |
| "loss": 0.0878, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09272652119398117, | |
| "step": 2130, | |
| "valid_targets_mean": 524.5, | |
| "valid_targets_min": 320 | |
| }, | |
| { | |
| "epoch": 5.245700245700245, | |
| "grad_norm": 1.126056243469827, | |
| "learning_rate": 1.90857648384305e-06, | |
| "loss": 0.0806, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07395152747631073, | |
| "step": 2135, | |
| "valid_targets_mean": 619.2, | |
| "valid_targets_min": 313 | |
| }, | |
| { | |
| "epoch": 5.257985257985258, | |
| "grad_norm": 1.1666537336061145, | |
| "learning_rate": 1.848077546586431e-06, | |
| "loss": 0.0828, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08457744866609573, | |
| "step": 2140, | |
| "valid_targets_mean": 546.1, | |
| "valid_targets_min": 374 | |
| }, | |
| { | |
| "epoch": 5.27027027027027, | |
| "grad_norm": 1.199622886892468, | |
| "learning_rate": 1.7885065077123976e-06, | |
| "loss": 0.083, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08328172564506531, | |
| "step": 2145, | |
| "valid_targets_mean": 554.4, | |
| "valid_targets_min": 387 | |
| }, | |
| { | |
| "epoch": 5.282555282555283, | |
| "grad_norm": 1.1728412716455279, | |
| "learning_rate": 1.7298664124009245e-06, | |
| "loss": 0.0834, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08228134363889694, | |
| "step": 2150, | |
| "valid_targets_mean": 528.0, | |
| "valid_targets_min": 312 | |
| }, | |
| { | |
| "epoch": 5.294840294840295, | |
| "grad_norm": 1.1803113468201745, | |
| "learning_rate": 1.672160258243567e-06, | |
| "loss": 0.0826, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08139941096305847, | |
| "step": 2155, | |
| "valid_targets_mean": 572.4, | |
| "valid_targets_min": 356 | |
| }, | |
| { | |
| "epoch": 5.3071253071253075, | |
| "grad_norm": 1.2619621031879709, | |
| "learning_rate": 1.615390995090258e-06, | |
| "loss": 0.0868, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09053842723369598, | |
| "step": 2160, | |
| "valid_targets_mean": 577.2, | |
| "valid_targets_min": 350 | |
| }, | |
| { | |
| "epoch": 5.319410319410319, | |
| "grad_norm": 1.2248890955579523, | |
| "learning_rate": 1.559561524898492e-06, | |
| "loss": 0.0812, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08257773518562317, | |
| "step": 2165, | |
| "valid_targets_mean": 617.9, | |
| "valid_targets_min": 381 | |
| }, | |
| { | |
| "epoch": 5.3316953316953315, | |
| "grad_norm": 1.2467046188005135, | |
| "learning_rate": 1.5046747015849893e-06, | |
| "loss": 0.0875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0834282860159874, | |
| "step": 2170, | |
| "valid_targets_mean": 543.5, | |
| "valid_targets_min": 387 | |
| }, | |
| { | |
| "epoch": 5.343980343980344, | |
| "grad_norm": 1.1148418333356434, | |
| "learning_rate": 1.4507333308798255e-06, | |
| "loss": 0.087, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07972699403762817, | |
| "step": 2175, | |
| "valid_targets_mean": 576.8, | |
| "valid_targets_min": 317 | |
| }, | |
| { | |
| "epoch": 5.356265356265356, | |
| "grad_norm": 1.0664970769867355, | |
| "learning_rate": 1.3977401701829752e-06, | |
| "loss": 0.0771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07614714652299881, | |
| "step": 2180, | |
| "valid_targets_mean": 600.1, | |
| "valid_targets_min": 344 | |
| }, | |
| { | |
| "epoch": 5.368550368550369, | |
| "grad_norm": 1.0693404257105559, | |
| "learning_rate": 1.345697928423384e-06, | |
| "loss": 0.0871, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07640987634658813, | |
| "step": 2185, | |
| "valid_targets_mean": 665.3, | |
| "valid_targets_min": 353 | |
| }, | |
| { | |
| "epoch": 5.38083538083538, | |
| "grad_norm": 1.189930580211284, | |
| "learning_rate": 1.2946092659204767e-06, | |
| "loss": 0.0835, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09601159393787384, | |
| "step": 2190, | |
| "valid_targets_mean": 593.2, | |
| "valid_targets_min": 353 | |
| }, | |
| { | |
| "epoch": 5.393120393120393, | |
| "grad_norm": 1.1238320625578555, | |
| "learning_rate": 1.244476794248175e-06, | |
| "loss": 0.0843, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08733360469341278, | |
| "step": 2195, | |
| "valid_targets_mean": 635.1, | |
| "valid_targets_min": 374 | |
| }, | |
| { | |
| "epoch": 5.405405405405405, | |
| "grad_norm": 1.2770058118624072, | |
| "learning_rate": 1.1953030761014017e-06, | |
| "loss": 0.0834, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09015761315822601, | |
| "step": 2200, | |
| "valid_targets_mean": 556.9, | |
| "valid_targets_min": 293 | |
| }, | |
| { | |
| "epoch": 5.417690417690418, | |
| "grad_norm": 1.13370178694208, | |
| "learning_rate": 1.147090625165055e-06, | |
| "loss": 0.0834, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07899008691310883, | |
| "step": 2205, | |
| "valid_targets_mean": 598.9, | |
| "valid_targets_min": 346 | |
| }, | |
| { | |
| "epoch": 5.42997542997543, | |
| "grad_norm": 1.1383568574789888, | |
| "learning_rate": 1.0998419059855503e-06, | |
| "loss": 0.0868, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08398991823196411, | |
| "step": 2210, | |
| "valid_targets_mean": 588.1, | |
| "valid_targets_min": 355 | |
| }, | |
| { | |
| "epoch": 5.442260442260443, | |
| "grad_norm": 1.2374596577372317, | |
| "learning_rate": 1.053559333844798e-06, | |
| "loss": 0.0836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0927569717168808, | |
| "step": 2215, | |
| "valid_targets_mean": 554.8, | |
| "valid_targets_min": 299 | |
| }, | |
| { | |
| "epoch": 5.454545454545454, | |
| "grad_norm": 1.060062165304812, | |
| "learning_rate": 1.0082452746367721e-06, | |
| "loss": 0.0798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07243770360946655, | |
| "step": 2220, | |
| "valid_targets_mean": 688.8, | |
| "valid_targets_min": 412 | |
| }, | |
| { | |
| "epoch": 5.466830466830467, | |
| "grad_norm": 1.234886143589627, | |
| "learning_rate": 9.639020447465475e-07, | |
| "loss": 0.0847, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08597145974636078, | |
| "step": 2225, | |
| "valid_targets_mean": 531.4, | |
| "valid_targets_min": 368 | |
| }, | |
| { | |
| "epoch": 5.479115479115479, | |
| "grad_norm": 1.0957537106814175, | |
| "learning_rate": 9.205319109318922e-07, | |
| "loss": 0.0825, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07663384079933167, | |
| "step": 2230, | |
| "valid_targets_mean": 577.2, | |
| "valid_targets_min": 349 | |
| }, | |
| { | |
| "epoch": 5.4914004914004915, | |
| "grad_norm": 1.3059978658871396, | |
| "learning_rate": 8.781370902074049e-07, | |
| "loss": 0.0844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0905524268746376, | |
| "step": 2235, | |
| "valid_targets_mean": 514.3, | |
| "valid_targets_min": 304 | |
| }, | |
| { | |
| "epoch": 5.503685503685504, | |
| "grad_norm": 1.145886936771296, | |
| "learning_rate": 8.367197497311719e-07, | |
| "loss": 0.0829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08202914893627167, | |
| "step": 2240, | |
| "valid_targets_mean": 638.8, | |
| "valid_targets_min": 349 | |
| }, | |
| { | |
| "epoch": 5.515970515970516, | |
| "grad_norm": 1.1560278108403075, | |
| "learning_rate": 7.962820066939958e-07, | |
| "loss": 0.0832, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08465921878814697, | |
| "step": 2245, | |
| "valid_targets_mean": 617.2, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 5.528255528255528, | |
| "grad_norm": 1.146827460792829, | |
| "learning_rate": 7.568259282111645e-07, | |
| "loss": 0.0835, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08355319499969482, | |
| "step": 2250, | |
| "valid_targets_mean": 589.8, | |
| "valid_targets_min": 408 | |
| }, | |
| { | |
| "epoch": 5.54054054054054, | |
| "grad_norm": 1.2678244005272907, | |
| "learning_rate": 7.183535312167755e-07, | |
| "loss": 0.0889, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08739691227674484, | |
| "step": 2255, | |
| "valid_targets_mean": 537.2, | |
| "valid_targets_min": 387 | |
| }, | |
| { | |
| "epoch": 5.552825552825553, | |
| "grad_norm": 1.4375448638349997, | |
| "learning_rate": 6.808667823606474e-07, | |
| "loss": 0.0791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07598749548196793, | |
| "step": 2260, | |
| "valid_targets_mean": 557.4, | |
| "valid_targets_min": 344 | |
| }, | |
| { | |
| "epoch": 5.565110565110565, | |
| "grad_norm": 1.2784791536932771, | |
| "learning_rate": 6.443675979077779e-07, | |
| "loss": 0.0836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09129341691732407, | |
| "step": 2265, | |
| "valid_targets_mean": 611.4, | |
| "valid_targets_min": 355 | |
| }, | |
| { | |
| "epoch": 5.577395577395578, | |
| "grad_norm": 1.146114809603193, | |
| "learning_rate": 6.088578436403847e-07, | |
| "loss": 0.0777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07668814063072205, | |
| "step": 2270, | |
| "valid_targets_mean": 548.8, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 5.58968058968059, | |
| "grad_norm": 1.1701260037394332, | |
| "learning_rate": 5.743393347625436e-07, | |
| "loss": 0.0809, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0863061472773552, | |
| "step": 2275, | |
| "valid_targets_mean": 631.6, | |
| "valid_targets_min": 328 | |
| }, | |
| { | |
| "epoch": 5.601965601965602, | |
| "grad_norm": 1.1336198412288852, | |
| "learning_rate": 5.408138358073833e-07, | |
| "loss": 0.0829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07773379981517792, | |
| "step": 2280, | |
| "valid_targets_mean": 707.2, | |
| "valid_targets_min": 335 | |
| }, | |
| { | |
| "epoch": 5.614250614250614, | |
| "grad_norm": 1.1732238369009445, | |
| "learning_rate": 5.082830605468969e-07, | |
| "loss": 0.0816, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0849945917725563, | |
| "step": 2285, | |
| "valid_targets_mean": 632.2, | |
| "valid_targets_min": 278 | |
| }, | |
| { | |
| "epoch": 5.6265356265356266, | |
| "grad_norm": 1.1247949445590306, | |
| "learning_rate": 4.767486719043235e-07, | |
| "loss": 0.0834, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07646386325359344, | |
| "step": 2290, | |
| "valid_targets_mean": 657.5, | |
| "valid_targets_min": 409 | |
| }, | |
| { | |
| "epoch": 5.638820638820639, | |
| "grad_norm": 1.2775182803680445, | |
| "learning_rate": 4.4621228186915833e-07, | |
| "loss": 0.0824, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08856301754713058, | |
| "step": 2295, | |
| "valid_targets_mean": 620.8, | |
| "valid_targets_min": 401 | |
| }, | |
| { | |
| "epoch": 5.651105651105651, | |
| "grad_norm": 1.2333249170791625, | |
| "learning_rate": 4.166754514147275e-07, | |
| "loss": 0.084, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08326977491378784, | |
| "step": 2300, | |
| "valid_targets_mean": 534.1, | |
| "valid_targets_min": 379 | |
| }, | |
| { | |
| "epoch": 5.663390663390663, | |
| "grad_norm": 1.1108265985271855, | |
| "learning_rate": 3.881396904184231e-07, | |
| "loss": 0.0791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07909250259399414, | |
| "step": 2305, | |
| "valid_targets_mean": 623.9, | |
| "valid_targets_min": 372 | |
| }, | |
| { | |
| "epoch": 5.675675675675675, | |
| "grad_norm": 1.1071999111918622, | |
| "learning_rate": 3.6060645758449584e-07, | |
| "loss": 0.0793, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06771372258663177, | |
| "step": 2310, | |
| "valid_targets_mean": 535.7, | |
| "valid_targets_min": 328 | |
| }, | |
| { | |
| "epoch": 5.687960687960688, | |
| "grad_norm": 1.2811978558463222, | |
| "learning_rate": 3.34077160369497e-07, | |
| "loss": 0.0851, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08796574175357819, | |
| "step": 2315, | |
| "valid_targets_mean": 582.8, | |
| "valid_targets_min": 272 | |
| }, | |
| { | |
| "epoch": 5.7002457002457, | |
| "grad_norm": 1.1575219121136995, | |
| "learning_rate": 3.08553154910336e-07, | |
| "loss": 0.0877, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08139719069004059, | |
| "step": 2320, | |
| "valid_targets_mean": 620.1, | |
| "valid_targets_min": 372 | |
| }, | |
| { | |
| "epoch": 5.712530712530713, | |
| "grad_norm": 1.2556732274676712, | |
| "learning_rate": 2.840357459549492e-07, | |
| "loss": 0.0809, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08443877846002579, | |
| "step": 2325, | |
| "valid_targets_mean": 542.2, | |
| "valid_targets_min": 399 | |
| }, | |
| { | |
| "epoch": 5.724815724815725, | |
| "grad_norm": 1.259874729323289, | |
| "learning_rate": 2.6052618679560884e-07, | |
| "loss": 0.085, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09191766381263733, | |
| "step": 2330, | |
| "valid_targets_mean": 461.4, | |
| "valid_targets_min": 322 | |
| }, | |
| { | |
| "epoch": 5.737100737100737, | |
| "grad_norm": 1.1917534753808974, | |
| "learning_rate": 2.380256792048541e-07, | |
| "loss": 0.0789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07446075975894928, | |
| "step": 2335, | |
| "valid_targets_mean": 511.6, | |
| "valid_targets_min": 342 | |
| }, | |
| { | |
| "epoch": 5.749385749385749, | |
| "grad_norm": 1.2373562089047119, | |
| "learning_rate": 2.1653537337405383e-07, | |
| "loss": 0.0823, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08613431453704834, | |
| "step": 2340, | |
| "valid_targets_mean": 577.6, | |
| "valid_targets_min": 396 | |
| }, | |
| { | |
| "epoch": 5.761670761670762, | |
| "grad_norm": 1.2034405427568784, | |
| "learning_rate": 1.9605636785462234e-07, | |
| "loss": 0.0795, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08189545571804047, | |
| "step": 2345, | |
| "valid_targets_mean": 621.9, | |
| "valid_targets_min": 365 | |
| }, | |
| { | |
| "epoch": 5.773955773955774, | |
| "grad_norm": 1.0799325407284226, | |
| "learning_rate": 1.7658970950185095e-07, | |
| "loss": 0.0809, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07476474344730377, | |
| "step": 2350, | |
| "valid_targets_mean": 609.9, | |
| "valid_targets_min": 321 | |
| }, | |
| { | |
| "epoch": 5.7862407862407865, | |
| "grad_norm": 1.1757906849036914, | |
| "learning_rate": 1.5813639342140197e-07, | |
| "loss": 0.0815, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08601002395153046, | |
| "step": 2355, | |
| "valid_targets_mean": 550.2, | |
| "valid_targets_min": 379 | |
| }, | |
| { | |
| "epoch": 5.798525798525798, | |
| "grad_norm": 1.188427364444162, | |
| "learning_rate": 1.4069736291843605e-07, | |
| "loss": 0.0809, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07079163193702698, | |
| "step": 2360, | |
| "valid_targets_mean": 589.3, | |
| "valid_targets_min": 338 | |
| }, | |
| { | |
| "epoch": 5.8108108108108105, | |
| "grad_norm": 1.2501121164250937, | |
| "learning_rate": 1.242735094493952e-07, | |
| "loss": 0.0839, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08723746240139008, | |
| "step": 2365, | |
| "valid_targets_mean": 535.8, | |
| "valid_targets_min": 373 | |
| }, | |
| { | |
| "epoch": 5.823095823095823, | |
| "grad_norm": 1.2533918670853614, | |
| "learning_rate": 1.0886567257643033e-07, | |
| "loss": 0.082, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08290581405162811, | |
| "step": 2370, | |
| "valid_targets_mean": 550.7, | |
| "valid_targets_min": 360 | |
| }, | |
| { | |
| "epoch": 5.835380835380835, | |
| "grad_norm": 1.1564818042371932, | |
| "learning_rate": 9.447463992448891e-08, | |
| "loss": 0.0847, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08502306044101715, | |
| "step": 2375, | |
| "valid_targets_mean": 659.5, | |
| "valid_targets_min": 325 | |
| }, | |
| { | |
| "epoch": 5.847665847665848, | |
| "grad_norm": 1.1755092919019083, | |
| "learning_rate": 8.110114714104277e-08, | |
| "loss": 0.0819, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07557385414838791, | |
| "step": 2380, | |
| "valid_targets_mean": 598.8, | |
| "valid_targets_min": 342 | |
| }, | |
| { | |
| "epoch": 5.85995085995086, | |
| "grad_norm": 1.1962496012954607, | |
| "learning_rate": 6.874587785849152e-08, | |
| "loss": 0.0812, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07918815314769745, | |
| "step": 2385, | |
| "valid_targets_mean": 603.8, | |
| "valid_targets_min": 377 | |
| }, | |
| { | |
| "epoch": 5.872235872235873, | |
| "grad_norm": 1.306024990482639, | |
| "learning_rate": 5.7409463659219286e-08, | |
| "loss": 0.0845, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08919475227594376, | |
| "step": 2390, | |
| "valid_targets_mean": 493.2, | |
| "valid_targets_min": 303 | |
| }, | |
| { | |
| "epoch": 5.884520884520884, | |
| "grad_norm": 1.2110406679406136, | |
| "learning_rate": 4.709248404329625e-08, | |
| "loss": 0.0805, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08352182805538177, | |
| "step": 2395, | |
| "valid_targets_mean": 576.7, | |
| "valid_targets_min": 314 | |
| }, | |
| { | |
| "epoch": 5.896805896805897, | |
| "grad_norm": 1.1494918965138259, | |
| "learning_rate": 3.7795466398868885e-08, | |
| "loss": 0.0807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07362495362758636, | |
| "step": 2400, | |
| "valid_targets_mean": 632.3, | |
| "valid_targets_min": 376 | |
| }, | |
| { | |
| "epoch": 5.909090909090909, | |
| "grad_norm": 1.2125273811964747, | |
| "learning_rate": 2.9518885975192702e-08, | |
| "loss": 0.0813, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0766986608505249, | |
| "step": 2405, | |
| "valid_targets_mean": 540.9, | |
| "valid_targets_min": 298 | |
| }, | |
| { | |
| "epoch": 5.921375921375922, | |
| "grad_norm": 1.3055089690040842, | |
| "learning_rate": 2.226316585833832e-08, | |
| "loss": 0.088, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09940752387046814, | |
| "step": 2410, | |
| "valid_targets_mean": 630.2, | |
| "valid_targets_min": 366 | |
| }, | |
| { | |
| "epoch": 5.933660933660933, | |
| "grad_norm": 1.2734724231764385, | |
| "learning_rate": 1.6028676949570997e-08, | |
| "loss": 0.0863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08724012225866318, | |
| "step": 2415, | |
| "valid_targets_mean": 496.8, | |
| "valid_targets_min": 343 | |
| }, | |
| { | |
| "epoch": 5.945945945945946, | |
| "grad_norm": 1.3140331206962972, | |
| "learning_rate": 1.0815737946383575e-08, | |
| "loss": 0.0797, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0807531550526619, | |
| "step": 2420, | |
| "valid_targets_mean": 511.9, | |
| "valid_targets_min": 355 | |
| }, | |
| { | |
| "epoch": 5.958230958230958, | |
| "grad_norm": 1.2133753164899292, | |
| "learning_rate": 6.624615326207284e-09, | |
| "loss": 0.086, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08304368704557419, | |
| "step": 2425, | |
| "valid_targets_mean": 509.8, | |
| "valid_targets_min": 319 | |
| }, | |
| { | |
| "epoch": 5.9705159705159705, | |
| "grad_norm": 1.2628163299904318, | |
| "learning_rate": 3.4555233327893124e-09, | |
| "loss": 0.0888, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08596052974462509, | |
| "step": 2430, | |
| "valid_targets_mean": 489.0, | |
| "valid_targets_min": 355 | |
| }, | |
| { | |
| "epoch": 5.982800982800983, | |
| "grad_norm": 1.1137438849834025, | |
| "learning_rate": 1.3086239652415621e-09, | |
| "loss": 0.0785, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07572906464338303, | |
| "step": 2435, | |
| "valid_targets_mean": 634.5, | |
| "valid_targets_min": 374 | |
| }, | |
| { | |
| "epoch": 5.995085995085995, | |
| "grad_norm": 1.256225517041073, | |
| "learning_rate": 1.840269697628294e-10, | |
| "loss": 0.0843, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08771345764398575, | |
| "step": 2440, | |
| "valid_targets_mean": 510.3, | |
| "valid_targets_min": 316 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08076979219913483, | |
| "step": 2442, | |
| "total_flos": 140364736167936.0, | |
| "train_loss": 0.16821050625614803, | |
| "train_runtime": 7348.4915, | |
| "train_samples_per_second": 5.31, | |
| "train_steps_per_second": 0.332, | |
| "valid_targets_mean": 624.3, | |
| "valid_targets_min": 380 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 2442, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 140364736167936.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |